# Model Inspect

In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import torch
from tqdm import tqdm
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from huggingface_hub import HfApi
import humanize


def inspect_model_details(model_names):
    """
    Inspect multiple models for a comprehensive comparison, including
    architecture, tokenizer, and Hub metadata.
    """
    print("🔍 COMPREHENSIVE MODEL INSPECTION")
    print("=" * 80)

    # Inisialisasi API untuk mengambil data dari Hugging Face Hub
    hf_api = HfApi()
    results = {}

    for model_name in model_names:
        print(f"\n📋 Inspecting: {model_name}")
        print("-" * 60)

        try:
            # 1. Inspeksi Konfigurasi
            config = AutoConfig.from_pretrained(model_name)
            info = {
                "model_type": config.model_type,
                # Detail Arsitektur
                "hidden_size": getattr(config, "hidden_size", "N/A"),
                "num_layers": getattr(config, "num_hidden_layers", "N/A"),
                "num_heads": getattr(config, "num_attention_heads", "N/A"),
                "num_parameters": (
                    humanize.intword(config.num_parameters())
                    if hasattr(config, "num_parameters")
                    and callable(config.num_parameters)
                    else "N/A"
                ),
                # Detail Klasifikasi
                "num_labels": config.num_labels,
                "labels": dict(config.id2label) if hasattr(config, "id2label") else {},
                "problem_type": getattr(config, "problem_type", "Not specified"),
            }

            print("   [Architecture]")
            print(f"   - Model Type: {info['model_type']}")
            print(f"   - Parameters: {info['num_parameters']}")
            print(
                f"   - Layers: {info['num_layers']}, Hidden Size: {info['hidden_size']}, Heads: {info['num_heads']}"
            )

            print("\n   [Classification Task]")
            print(f"   - Problem Type: {info['problem_type']}")
            print(f"   - Number of Labels: {info['num_labels']}")
            if info["labels"]:
                print(f"   - Categories: {list(info['labels'].values())}")

            # 2. Inspeksi Tokenizer
            try:
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                info["tokenizer_class"] = tokenizer.__class__.__name__
                info["vocab_size"] = humanize.intword(tokenizer.vocab_size)

                print("\n   [Tokenizer]")
                print(f"   - Class: {info['tokenizer_class']}")
                print(f"   - Vocabulary Size: {info['vocab_size']}")
            except Exception as tokenizer_error:
                print(f"\n   [Tokenizer]")
                print(f"   - ❌ Error loading tokenizer: {tokenizer_error}")
                info["tokenizer_error"] = str(tokenizer_error)

            # 3. Inspeksi Metadata dari Hugging Face Hub (with better error handling)
            try:
                model_info_hub = hf_api.model_info(model_name)

                # Safe access to attributes
                downloads = getattr(model_info_hub, "downloads", 0)
                likes = getattr(model_info_hub, "likes", 0)
                last_modified = getattr(model_info_hub, "lastModified", None)

                info["downloads"] = humanize.intword(downloads) if downloads else "N/A"
                info["likes"] = humanize.intword(likes) if likes else "N/A"

                # Safe date formatting
                if last_modified:
                    if hasattr(last_modified, "strftime"):
                        info["last_modified"] = last_modified.strftime("%Y-%m-%d")
                    else:
                        info["last_modified"] = str(last_modified).split("T")[0]
                else:
                    info["last_modified"] = "N/A"

                print("\n   [Hub Info]")
                print(f"   - Downloads: {info['downloads']}")
                print(f"   - Likes: {info['likes']}")
                print(f"   - Last Modified: {info['last_modified']}")

            except Exception as hub_error:
                print(f"\n   [Hub Info]")
                print(f"   - ❌ Error accessing Hub info: {hub_error}")
                info["hub_error"] = str(hub_error)
                info["downloads"] = "N/A"
                info["likes"] = "N/A"
                info["last_modified"] = "N/A"

            results[model_name] = info

        except Exception as e:
            print(f"   ❌ Error processing {model_name}: {e}")
            results[model_name] = {"error": str(e)}

    return results

# BERT Model Test

In [3]:
# 1. Inisialisasi pipeline "fill-mask"
# Ini akan mengunduh model jika belum ada di cache
print("Mengunduh model (jika diperlukan)...")
tebak_kata = pipeline("fill-mask", model="cahya/bert-base-indonesian-1.5G")
print("Model siap digunakan.")

# 2. Siapkan beberapa kalimat tes
kalimat1 = "Ibu kota negara Indonesia adalah [MASK]."
kalimat2 = "Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK]."
kalimat3 = "Setelah lelah bekerja seharian, enaknya minum [MASK] dingin."
kalimat4 = "Dia membeli mobil baru berwarna [MASK]."

# 3. Lakukan prediksi dan lihat hasilnya
print(f"\n--- Tes untuk: '{kalimat1}' ---")
hasil1 = tebak_kata(kalimat1)
for prediksi in hasil1:
    print(
        f"Kata: {prediksi['token_str']:<15} | Skor Keyakinan: {prediksi['score']:.4f}"
    )

print(f"\n--- Tes untuk: '{kalimat2}' ---")
hasil2 = tebak_kata(kalimat2, top_k=3)  # Minta 3 tebakan teratas
for prediksi in hasil2:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat3}' ---")
hasil3 = tebak_kata(kalimat3, top_k=3)
for prediksi in hasil3:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat4}' ---")
hasil4 = tebak_kata(kalimat4, top_k=3)
for prediksi in hasil4:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

Mengunduh model (jika diperlukan)...


Some weights of the model checkpoint at cahya/bert-base-indonesian-1.5G were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


Model siap digunakan.

--- Tes untuk: 'Ibu kota negara Indonesia adalah [MASK].' ---
Kata: jakarta         | Skor Keyakinan: 0.5408
Kata: yogyakarta      | Skor Keyakinan: 0.0404
Kata: pontianak       | Skor Keyakinan: 0.0294
Kata: makassar        | Skor Keyakinan: 0.0170
Kata: merauke         | Skor Keyakinan: 0.0158

--- Tes untuk: 'Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK].' ---
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang dokter.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang perawat.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang bidan.

--- Tes untuk: 'Setelah lelah bekerja seharian, enaknya minum [MASK] dingin.' ---
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum air dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum minuman dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum teh dingin.

--- Tes untuk: 'Dia membeli mo

# 🌍 XLM-RoBERTa Model Testing

Testing Facebook's XLM-RoBERTa (Cross-lingual RoBERTa) model which supports 100 languages including Indonesian. This model can be useful for:
- Text classification tasks in multiple languages
- Feature extraction for bot detection
- Cross-lingual text analysis
- Masked language modeling

In [4]:
# Install required packages for XLM-RoBERTa
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
        print(f"✅ {package} already installed")
    except ImportError:
        print(f"📦 Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} installed successfully")

# Install transformers if not already installed
install_package("transformers")
install_package("torch")

print("\n🎉 All packages ready for XLM-RoBERTa testing!")

✅ transformers already installed
✅ torch already installed

🎉 All packages ready for XLM-RoBERTa testing!


In [5]:
# Basic XLM-RoBERTa Model Testing
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM
import torch

print("🚀 Loading XLM-RoBERTa Model...")
print("⚠️  This might take a few minutes for the first time (downloading ~2.2GB)")

# Method 1: Using pipeline (High-level API)
try:
    # Load the fill-mask pipeline
    xlm_roberta_pipe = pipeline(
        "fill-mask",
        model="FacebookAI/xlm-roberta-large",
        tokenizer="FacebookAI/xlm-roberta-large"
    )
    print("✅ XLM-RoBERTa pipeline loaded successfully!")

    # Test with English
    english_test = "I think this account is a <mask>."
    print(f"\n🇺🇸 English test: '{english_test}'")
    english_results = xlm_roberta_pipe(english_test, top_k=5)

    for i, result in enumerate(english_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with Indonesian
    indonesian_test = "Saya pikir akun ini adalah <mask>."
    print(f"\n🇮🇩 Indonesian test: '{indonesian_test}'")
    indonesian_results = xlm_roberta_pipe(indonesian_test, top_k=5)

    for i, result in enumerate(indonesian_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with china
    chinese_test = "我认为这个账户是一个<mask>。"
    print(f"\n🇨🇳 Chinese test: '{chinese_test}'")
    chinese_results = xlm_roberta_pipe(chinese_test, top_k=5)

    for i, result in enumerate(chinese_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with arabic
    arabic_test = "أعتقد أن هذا الحساب هو <mask>."
    print(f"\n🇸🇦 Arabic test: '{arabic_test}'")
    arabic_results = xlm_roberta_pipe(arabic_test, top_k=5)

    for i, result in enumerate(arabic_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with japanese
    japanese_test = "このアカウントは<mask>だと思います。"
    print(f"\n🇯🇵 Japanese test: '{japanese_test}'")
    japanese_results = xlm_roberta_pipe(japanese_test, top_k=5)

    for i, result in enumerate(japanese_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with korean
    korean_test = "이 계정은 <mask>라고 생각합니다."
    print(f"\n🇰🇷 Korean test: '{korean_test}'")
    korean_results = xlm_roberta_pipe(korean_test, top_k=5)

    for i, result in enumerate(korean_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with spanish
    spanish_test = "Creo que esta cuenta es un <mask>."
    print(f"\n🇪🇸 Spanish test: '{spanish_test}'")
    spanish_results = xlm_roberta_pipe(spanish_test, top_k=5)

    for i, result in enumerate(spanish_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

except Exception as e:
    print(f"❌ Error loading pipeline: {e}")
    xlm_roberta_pipe = None

🚀 Loading XLM-RoBERTa Model...
⚠️  This might take a few minutes for the first time (downloading ~2.2GB)


Some weights of the model checkpoint at FacebookAI/xlm-roberta-large were not used when initializing XLMRobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


✅ XLM-RoBERTa pipeline loaded successfully!

🇺🇸 English test: 'I think this account is a <mask>.'
  1. I think this account is a scam . (score: 0.6098)
  2. I think this account is a fake . (score: 0.2386)
  3. I think this account is a fraud . (score: 0.0285)
  4. I think this account is a troll . (score: 0.0239)
  5. I think this account is a hack . (score: 0.0065)

🇮🇩 Indonesian test: 'Saya pikir akun ini adalah <mask>.'
  1. Saya pikir akun ini adalah palsu . (score: 0.4199)
  2. Saya pikir akun ini adalah penipuan . (score: 0.1155)
  3. Saya pikir akun ini adalah scam . (score: 0.0412)
  4. Saya pikir akun ini adalah ilegal . (score: 0.0322)
  5. Saya pikir akun ini adalah salah . (score: 0.0210)

🇨🇳 Chinese test: '我认为这个账户是一个<mask>。'
  1. 我认为这个账户是一个问题 。 (score: 0.0870)
  2. 我认为这个账户是一个陷阱 。 (score: 0.0749)
  3. 我认为这个账户是一个错误 。 (score: 0.0522)
  4. 我认为这个账户是一个违法行为 。 (score: 0.0352)
  5. 我认为这个账户是一个漏洞 。 (score: 0.0241)

🇸🇦 Arabic test: 'أعتقد أن هذا الحساب هو <mask>.'
  1. أعتقد أن هذا ا

In [6]:
# Method 2: Direct model loading (Lower-level API)
print("🔧 Loading XLM-RoBERTa with direct model access...")

try:
    # Load tokenizer and model directly
    xlm_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large")
    xlm_model = AutoModelForMaskedLM.from_pretrained("FacebookAI/xlm-roberta-large")

    print("✅ XLM-RoBERTa tokenizer and model loaded successfully!")
    print(f"📊 Model config:")
    print(f"   - Vocab size: {xlm_tokenizer.vocab_size:,}")
    print(f"   - Max length: {xlm_tokenizer.model_max_length}")
    print(f"   - Model parameters: ~{sum(p.numel() for p in xlm_model.parameters()):,}")

    # Test direct usage
    def test_xlm_roberta_direct(text):
        print(f"\n🔍 Testing: '{text}'")

        # Tokenize
        inputs = xlm_tokenizer(text, return_tensors='pt')
        print(f"   📝 Tokenized: {xlm_tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])}")

        # Get embeddings/features
        with torch.no_grad():
            outputs = xlm_model(**inputs, output_hidden_states=True)
            # Get the hidden states (features)
            hidden_states = outputs.hidden_states[-1]  # Last layer hidden states

        print(f"   🧠 Hidden states shape: {hidden_states.shape}")
        print(f"   📊 Feature vector size per token: {hidden_states.shape[-1]}")

        return hidden_states

    # Test with different languages
    test_texts = [
        "Replace me by any text you'd like.",  # English
        "Ganti saya dengan teks apa pun yang Anda suka.",  # Indonesian
        "このテキストを好きなものに置き換えてください。"  # Japanese
    ]

    for text in test_texts:
        features = test_xlm_roberta_direct(text)

except Exception as e:
    print(f"❌ Error loading model directly: {e}")
    xlm_tokenizer = None
    xlm_model = None

🔧 Loading XLM-RoBERTa with direct model access...


Some weights of the model checkpoint at FacebookAI/xlm-roberta-large were not used when initializing XLMRobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


✅ XLM-RoBERTa tokenizer and model loaded successfully!
📊 Model config:
   - Vocab size: 250,002
   - Max length: 512
   - Model parameters: ~560,142,482

🔍 Testing: 'Replace me by any text you'd like.'
   📝 Tokenized: ['<s>', '▁Re', 'place', '▁me', '▁by', '▁any', '▁text', '▁you', "'", 'd', '▁like', '.', '</s>']
   🧠 Hidden states shape: torch.Size([1, 13, 1024])
   📊 Feature vector size per token: 1024

🔍 Testing: 'Ganti saya dengan teks apa pun yang Anda suka.'
   📝 Tokenized: ['<s>', '▁Gan', 'ti', '▁saya', '▁dengan', '▁teks', '▁apa', '▁pun', '▁yang', '▁Anda', '▁suka', '.', '</s>']
   🧠 Hidden states shape: torch.Size([1, 13, 1024])
   📊 Feature vector size per token: 1024

🔍 Testing: 'このテキストを好きなものに置き換えてください。'
   📝 Tokenized: ['<s>', '▁この', 'テキスト', 'を', '好きな', 'もの', 'に', '置き', '換え', 'てください', '。', '</s>']
   🧠 Hidden states shape: torch.Size([1, 12, 1024])
   📊 Feature vector size per token: 1024


# Spam Analyze

In [7]:
models_to_compare = [
    "plipustel/IndoBERT-spam-detector",
    "iqbalpurba26/indobert-ham-spam-detection",
    "kasyfilalbar/indo-spam-chatbot",
]

detailed_results = inspect_model_details(models_to_compare)

🔍 COMPREHENSIVE MODEL INSPECTION

📋 Inspecting: plipustel/IndoBERT-spam-detector
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: single_label_classification
   - Number of Labels: 5
   - Categories: ['LABEL_0', 'LABEL_1', 'LABEL_2', 'LABEL_3', 'LABEL_4']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 30.5 thousand

   [Hub Info]
   - Downloads: 82
   - Likes: N/A
   - Last Modified: 2025-06-07

📋 Inspecting: iqbalpurba26/indobert-ham-spam-detection
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: None
   - Number of Labels: 2
   - Categories: ['LABEL_0', 'LABEL_1']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 30.5 thousan

In [8]:
class IndoSpamChatbotAnalyzer:
    def __init__(self, model_name="kasyfilalbar/indo-spam-chatbot"):
        """Initialize Indo Spam Chatbot analyzer"""
        print("🔄 Loading Indo Spam Chatbot model...")

        try:
            # Load tokenizer and model
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(
                model_name, device_map="auto" if torch.cuda.is_available() else None
            )

            # Set device
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            if not torch.cuda.is_available():
                self.model = self.model.to(self.device)

            # Labels berdasarkan dokumentasi
            self.labels = {0: "Non-spam", 1: "Spam"}

            print("✅ Indo Spam Chatbot model loaded successfully!")
            print(f"🔧 Using device: {self.device}")
            print(f"🏷️ Labels: {self.labels}")

        except Exception as e:
            print(f"❌ Error loading model: {e}")
            raise e

    def predict_single(self, text):
        """Predict spam for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {"label": "Non-spam", "score": 0.0, "confidence": "low"}

        try:
            # Tokenize
            encoded_input = self.tokenizer(
                str(text), padding=True, truncation=True, return_tensors="pt"
            )

            # Move to device
            encoded_input = {k: v.to(self.device) for k, v in encoded_input.items()}

            # Predict
            with torch.no_grad():
                model_output = self.model(**encoded_input)
                logits = model_output.logits

                # Get probabilities
                probabilities = torch.softmax(logits, dim=1)
                predicted_label = torch.argmax(logits, dim=1).item()
                confidence_score = probabilities[0][predicted_label].item()

            # Map to label
            label = self.labels[predicted_label]

            # Determine confidence level
            if confidence_score >= 0.8:
                confidence = "high"
            elif confidence_score >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "label": label,
                "score": confidence_score,
                "confidence": confidence,
                "probabilities": {
                    "Non-spam": probabilities[0][0].item(),
                    "Spam": probabilities[0][1].item(),
                },
            }

        except Exception as e:
            print(f"Error predicting spam: {e}")
            return {"label": "Non-spam", "score": 0.0, "confidence": "error"}

    def predict_batch(self, texts, batch_size=32):
        """Predict spam for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for spam detection...")
        print(f"📊 Batch size: {batch_size}")

        # Process in batches
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing spam"):
            batch = texts[i : i + batch_size]

            # Clean batch texts
            clean_batch = []
            for text in batch:
                if pd.isna(text) or text is None or text == "" or text == "No Comment":
                    clean_batch.append("No Comment")
                else:
                    clean_batch.append(str(text))

            try:
                # Tokenize batch
                encoded_inputs = self.tokenizer(
                    clean_batch, padding=True, truncation=True, return_tensors="pt"
                )

                # Move to device
                encoded_inputs = {
                    k: v.to(self.device) for k, v in encoded_inputs.items()
                }

                # Predict batch
                with torch.no_grad():
                    model_outputs = self.model(**encoded_inputs)
                    logits = model_outputs.logits
                    probabilities = torch.softmax(logits, dim=1)
                    predicted_labels = torch.argmax(logits, dim=1)

                # Process results
                for j in range(len(batch)):
                    if clean_batch[j] == "No Comment":
                        results.append(
                            {"label": "Non-spam", "score": 0.0, "confidence": "low"}
                        )
                    else:
                        label_idx = predicted_labels[j].item()
                        confidence_score = probabilities[j][label_idx].item()
                        label = self.labels[label_idx]

                        # Determine confidence level
                        if confidence_score >= 0.8:
                            confidence = "high"
                        elif confidence_score >= 0.6:
                            confidence = "medium"
                        else:
                            confidence = "low"

                        results.append(
                            {
                                "label": label,
                                "score": confidence_score,
                                "confidence": confidence,
                            }
                        )

            except Exception as e:
                print(f"Error in spam batch {i//batch_size + 1}: {e}")
                # Add default predictions for failed batch
                for _ in range(len(batch)):
                    results.append(
                        {"label": "Non-spam", "score": 0.0, "confidence": "error"}
                    )

        return results

    def analyze_results(self, results):
        """Analyze and display spam detection results"""
        total = len(results)

        print(f"\n📊 SPAM DETECTION SUMMARY")
        print("=" * 50)
        print(f"📝 Total texts: {total}")

        # Spam distribution
        spam_counts = {"Spam": 0, "Non-spam": 0}
        for result in results:
            label = result["label"]
            spam_counts[label] = spam_counts.get(label, 0) + 1

        print(f"\n🚨 Spam Distribution:")
        for category, count in spam_counts.items():
            percentage = (count / total) * 100
            emoji = "🚨" if category == "Spam" else "✅"
            print(f"   {emoji} {category}: {count} ({percentage:.1f}%)")

        # Confidence distribution
        conf_counts = {"high": 0, "medium": 0, "low": 0, "error": 0}
        for result in results:
            conf_counts[result["confidence"]] += 1

        print(f"\n🎯 Confidence Distribution:")
        for conf, count in conf_counts.items():
            percentage = (count / total) * 100
            print(f"   {conf.capitalize()}: {count} ({percentage:.1f}%)")

        return {
            "total": total,
            "spam_distribution": spam_counts,
            "confidence_distribution": conf_counts,
        }


# Test Indo Spam Chatbot Analyzer
print("🚀 Testing Indo Spam Chatbot Analyzer...")

try:
    # Initialize analyzer
    spam_analyzer = IndoSpamChatbotAnalyzer()

    # Test dengan contoh dari dokumentasi + tambahan
    test_messages = [
        ("adsfwcasdfad", "Expected: Spam (gibberish)"),
        (
            "kak bisa depo di link ini: http://dewa.site/dewa/dewi",
            "Expected: Spam (suspicious link)",
        ),
        ("p", "Expected: Spam (single character)"),
        ("1234", "Expected: Spam (numbers only)"),
        (
            "Halo, bagaimana kabar Anda hari ini?",
            "Expected: Non-spam (normal conversation)",
        ),
        ("Terima kasih atas informasinya", "Expected: Non-spam (polite message)"),
        ("KLIK DISINI MENANG 100 JUTA!!!", "Expected: Spam (promotional spam)"),
        (
            "Selamat pagi, apakah bisa bantu saya?",
            "Expected: Non-spam (normal inquiry)",
        ),
        ("wwwwwwwwwwwwwww", "Expected: Spam (repeated characters)"),
        ("Mohon maaf mengganggu waktu Anda", "Expected: Non-spam (polite message)"),
    ]

    print(f"\n🧪 Testing individual spam predictions:")
    print("-" * 70)

    for message, expected in test_messages:
        result = spam_analyzer.predict_single(message)
        spam_emoji = "🚨" if result["label"] == "Spam" else "✅"

        print(f"Message: '{message}'")
        print(f"   {expected}")
        print(
            f"   {spam_emoji} Result: {result['label']} ({result['score']:.3f}) - {result['confidence']}"
        )
        if "probabilities" in result:
            print(
                f"   📊 Probabilities: Non-spam: {result['probabilities']['Non-spam']:.3f}, Spam: {result['probabilities']['Spam']:.3f}"
            )
        print()

    print("✅ Indo Spam Chatbot Analyzer test completed!")

except Exception as e:
    print(f"❌ Error in Indo Spam Chatbot Analyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing Indo Spam Chatbot Analyzer...
🔄 Loading Indo Spam Chatbot model...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 38.66it/s]
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


✅ Indo Spam Chatbot model loaded successfully!
🔧 Using device: cpu
🏷️ Labels: {0: 'Non-spam', 1: 'Spam'}

🧪 Testing individual spam predictions:
----------------------------------------------------------------------
Message: 'adsfwcasdfad'
   Expected: Spam (gibberish)
   🚨 Result: Spam (0.998) - high
   📊 Probabilities: Non-spam: 0.002, Spam: 0.998

Message: 'kak bisa depo di link ini: http://dewa.site/dewa/dewi'
   Expected: Spam (suspicious link)
   🚨 Result: Spam (0.996) - high
   📊 Probabilities: Non-spam: 0.004, Spam: 0.996

Message: 'p'
   Expected: Spam (single character)
   🚨 Result: Spam (0.999) - high
   📊 Probabilities: Non-spam: 0.001, Spam: 0.999

Message: '1234'
   Expected: Spam (numbers only)
   🚨 Result: Spam (0.998) - high
   📊 Probabilities: Non-spam: 0.002, Spam: 0.998

Message: 'Halo, bagaimana kabar Anda hari ini?'
   Expected: Non-spam (normal conversation)
   ✅ Result: Non-spam (0.985) - high
   📊 Probabilities: Non-spam: 0.985, Spam: 0.015

Message: 'Terima ka