# Model Inspect

In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import torch
from tqdm import tqdm
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from huggingface_hub import HfApi
import humanize


def inspect_model_details(model_names):
    """
    Inspect multiple models for a comprehensive comparison, including
    architecture, tokenizer, and Hub metadata.
    """
    print("🔍 COMPREHENSIVE MODEL INSPECTION")
    print("=" * 80)

    # Inisialisasi API untuk mengambil data dari Hugging Face Hub
    hf_api = HfApi()
    results = {}

    for model_name in model_names:
        print(f"\n📋 Inspecting: {model_name}")
        print("-" * 60)

        try:
            # 1. Inspeksi Konfigurasi
            config = AutoConfig.from_pretrained(model_name)
            info = {
                "model_type": config.model_type,
                # Detail Arsitektur
                "hidden_size": getattr(config, "hidden_size", "N/A"),
                "num_layers": getattr(config, "num_hidden_layers", "N/A"),
                "num_heads": getattr(config, "num_attention_heads", "N/A"),
                "num_parameters": (
                    humanize.intword(config.num_parameters())
                    if hasattr(config, "num_parameters")
                    and callable(config.num_parameters)
                    else "N/A"
                ),
                # Detail Klasifikasi
                "num_labels": config.num_labels,
                "labels": dict(config.id2label) if hasattr(config, "id2label") else {},
                "problem_type": getattr(config, "problem_type", "Not specified"),
            }

            print("   [Architecture]")
            print(f"   - Model Type: {info['model_type']}")
            print(f"   - Parameters: {info['num_parameters']}")
            print(
                f"   - Layers: {info['num_layers']}, Hidden Size: {info['hidden_size']}, Heads: {info['num_heads']}"
            )

            print("\n   [Classification Task]")
            print(f"   - Problem Type: {info['problem_type']}")
            print(f"   - Number of Labels: {info['num_labels']}")
            if info["labels"]:
                print(f"   - Categories: {list(info['labels'].values())}")

            # 2. Inspeksi Tokenizer
            try:
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                info["tokenizer_class"] = tokenizer.__class__.__name__
                info["vocab_size"] = humanize.intword(tokenizer.vocab_size)

                print("\n   [Tokenizer]")
                print(f"   - Class: {info['tokenizer_class']}")
                print(f"   - Vocabulary Size: {info['vocab_size']}")
            except Exception as tokenizer_error:
                print(f"\n   [Tokenizer]")
                print(f"   - ❌ Error loading tokenizer: {tokenizer_error}")
                info["tokenizer_error"] = str(tokenizer_error)

            # 3. Inspeksi Metadata dari Hugging Face Hub (with better error handling)
            try:
                model_info_hub = hf_api.model_info(model_name)

                # Safe access to attributes
                downloads = getattr(model_info_hub, "downloads", 0)
                likes = getattr(model_info_hub, "likes", 0)
                last_modified = getattr(model_info_hub, "lastModified", None)

                info["downloads"] = humanize.intword(downloads) if downloads else "N/A"
                info["likes"] = humanize.intword(likes) if likes else "N/A"

                # Safe date formatting
                if last_modified:
                    if hasattr(last_modified, "strftime"):
                        info["last_modified"] = last_modified.strftime("%Y-%m-%d")
                    else:
                        info["last_modified"] = str(last_modified).split("T")[0]
                else:
                    info["last_modified"] = "N/A"

                print("\n   [Hub Info]")
                print(f"   - Downloads: {info['downloads']}")
                print(f"   - Likes: {info['likes']}")
                print(f"   - Last Modified: {info['last_modified']}")

            except Exception as hub_error:
                print(f"\n   [Hub Info]")
                print(f"   - ❌ Error accessing Hub info: {hub_error}")
                info["hub_error"] = str(hub_error)
                info["downloads"] = "N/A"
                info["likes"] = "N/A"
                info["last_modified"] = "N/A"

            results[model_name] = info

        except Exception as e:
            print(f"   ❌ Error processing {model_name}: {e}")
            results[model_name] = {"error": str(e)}

    return results

# Sentiment Analyze

In [3]:
models_to_compare = [
    "Aardiiiiy/indobertweet-base-Indonesian-sentiment-analysis",
]

detailed_results = inspect_model_details(models_to_compare)

🔍 COMPREHENSIVE MODEL INSPECTION

📋 Inspecting: Aardiiiiy/indobertweet-base-Indonesian-sentiment-analysis
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: single_label_classification
   - Number of Labels: 3
   - Categories: ['Negative', 'Neutral', 'Positive']
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: single_label_classification
   - Number of Labels: 3
   - Categories: ['Negative', 'Neutral', 'Positive']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 31.9 thousand

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 31.9 thousand

   [Hub Info]
   - Downloads: 3.1 thousand
   - Likes: 6
   - Last Modified: 2025-05-23

   [Hub Info]
   - Downloads: 3.1 thousand
   - Likes: 6
   - Las

In [4]:
class SentimentAnalyzer:
    def __init__(
        self, model_name="Aardiiiiy/indobertweet-base-Indonesian-sentiment-analysis"
    ):
        """Initialize sentiment analyzer with IndoBERTweet model"""
        print("🔄 Loading IndoBERTweet sentiment model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist
        self.sentiment_labels = ["NEGATIVE", "NEUTRAL", "POSITIVE"]

        print("✅ Sentiment model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🏷️ Labels: {', '.join(self.sentiment_labels)}")

    def predict_single(self, text):
        """Predict sentiment for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {"label": "NEUTRAL", "score": 0.0, "confidence": "low"}

        try:
            result = self.pipe(str(text))
            prediction = result[0]

            # Add confidence level based on score
            if prediction["score"] >= 0.8:
                confidence = "high"
            elif prediction["score"] >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "label": prediction["label"],
                "score": prediction["score"],
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting sentiment: {e}")
            return {"label": "NEUTRAL", "score": 0.0, "confidence": "error"}

    def predict_batch(self, texts, batch_size=32):
        """Predict sentiment for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for sentiment analysis...")
        print(f"📊 Batch size: {batch_size}")

        # Process in batches with progress bar
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing sentiment"):
            batch = texts[i : i + batch_size]

            # Clean batch texts
            clean_batch = []
            for text in batch:
                if pd.isna(text) or text is None or text == "" or text == "No Comment":
                    clean_batch.append("No Comment")
                else:
                    clean_batch.append(str(text))

            try:
                # Predict batch
                batch_results = self.pipe(clean_batch)

                # Process results
                for j, result in enumerate(batch_results):
                    if clean_batch[j] == "No Comment":
                        results.append(
                            {"label": "NEUTRAL", "score": 0.0, "confidence": "low"}
                        )
                    else:
                        # Add confidence level
                        if result["score"] >= 0.8:
                            confidence = "high"
                        elif result["score"] >= 0.6:
                            confidence = "medium"
                        else:
                            confidence = "low"

                        results.append(
                            {
                                "label": result["label"],
                                "score": result["score"],
                                "confidence": confidence,
                            }
                        )

            except Exception as e:
                print(f"Error in sentiment batch {i//batch_size + 1}: {e}")
                # Add neutral predictions for failed batch
                for _ in range(len(batch)):
                    results.append(
                        {"label": "NEUTRAL", "score": 0.0, "confidence": "error"}
                    )

        return results

    def analyze_results(self, results):
        """Analyze and display sentiment analysis results"""
        total = len(results)

        print(f"\n📊 SENTIMENT ANALYSIS SUMMARY")
        print("=" * 50)
        print(f"📝 Total texts: {total}")

        # Sentiment distribution
        sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
        for result in results:
            label = result["label"]
            sentiment_counts[label] = sentiment_counts.get(label, 0) + 1

        print(f"\n💭 Sentiment Distribution:")
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total) * 100
            emoji = (
                "😊"
                if sentiment == "POSITIVE"
                else "😞" if sentiment == "NEGATIVE" else "😐"
            )
            print(f"   {emoji} {sentiment}: {count} ({percentage:.1f}%)")

        # Confidence distribution
        conf_counts = {"high": 0, "medium": 0, "low": 0, "error": 0}
        for result in results:
            conf_counts[result["confidence"]] += 1

        print(f"\n🎯 Confidence Distribution:")
        for conf, count in conf_counts.items():
            percentage = (count / total) * 100
            print(f"   {conf.capitalize()}: {count} ({percentage:.1f}%)")

        return {
            "total": total,
            "sentiment_distribution": sentiment_counts,
            "confidence_distribution": conf_counts,
        }

In [5]:
# Test SentimentAnalyzer only

print("🚀 Testing SentimentAnalyzer...")

# Initialize
try:
    sentiment_analyzer = SentimentAnalyzer()

    # Test with some examples
    test_texts = [
        "Gimana sih @layananinternet, sinyalnya ilang-ilangan mulu dari pagi di daerah Bekasi. Mana lagi butuh buat kerjaan #internetdown",
        "Sumpah, vibe-nya cozy abis buat nugas. Kopinya juga aje gile mantep. Fix bakal jadi langganan! ✨",
        "Overall experience-nya lumayan sih, cuman servicenya agak lama. Nunggunya ampe stengah jam sndiri.",
        "Desain HP-nya keren, kameranya juga oke. TAPI KENAPA BATERAINYA BOROS BANGET?! Baru setengah hari udah abis 😭",
        "Ada yg tau info konser terbaru bulan ini gaes?",
    ]

    print(f"\n🧪 Testing individual sentiment predictions:")
    print("-" * 60)

    for text in test_texts:
        result = sentiment_analyzer.predict_single(text)
        sentiment_emoji = (
            "😊"
            if result["label"] == "POSITIVE"
            else "😞" if result["label"] == "NEGATIVE" else "😐"
        )
        print(f"Text: '{text}'")
        print(
            f"   💭 Sentiment: {sentiment_emoji} {result['label']} ({result['score']:.3f}) - {result['confidence']}"
        )
        print()

    print("✅ SentimentAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in SentimentAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing SentimentAnalyzer...
🔄 Loading IndoBERTweet sentiment model...


Device set to use cpu


✅ Sentiment model loaded successfully!
🔧 Using device: CPU
🏷️ Labels: NEGATIVE, NEUTRAL, POSITIVE

🧪 Testing individual sentiment predictions:
------------------------------------------------------------
Text: 'Gimana sih @layananinternet, sinyalnya ilang-ilangan mulu dari pagi di daerah Bekasi. Mana lagi butuh buat kerjaan #internetdown'
   💭 Sentiment: 😐 Negative (0.970) - high

Text: 'Sumpah, vibe-nya cozy abis buat nugas. Kopinya juga aje gile mantep. Fix bakal jadi langganan! ✨'
   💭 Sentiment: 😐 Positive (0.997) - high

Text: 'Overall experience-nya lumayan sih, cuman servicenya agak lama. Nunggunya ampe stengah jam sndiri.'
   💭 Sentiment: 😐 Neutral (0.995) - high

Text: 'Desain HP-nya keren, kameranya juga oke. TAPI KENAPA BATERAINYA BOROS BANGET?! Baru setengah hari udah abis 😭'
   💭 Sentiment: 😐 Neutral (0.841) - high

Text: 'Ada yg tau info konser terbaru bulan ini gaes?'
   💭 Sentiment: 😐 Neutral (0.998) - high

✅ SentimentAnalyzer test completed!
Text: 'Gimana sih @layanan

# Emotion Analyze

In [6]:
models_to_compare = [
    "Aardiiiiy/EmoSense-ID-Indonesian-Emotion-Classifier",
]

detailed_results = inspect_model_details(models_to_compare)

🔍 COMPREHENSIVE MODEL INSPECTION

📋 Inspecting: Aardiiiiy/EmoSense-ID-Indonesian-Emotion-Classifier
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: single_label_classification
   - Number of Labels: 8
   - Categories: ['Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: single_label_classification
   - Number of Labels: 8
   - Categories: ['Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 30.5 thousand

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 30.5 thousand

   [Hub Info]
   - Downloads: 2.0 thousand
   - Likes: 2
   - La

In [7]:
class EmotionAnalyzer:
    def __init__(
        self, model_name="Aardiiiiy/EmoSense-ID-Indonesian-Emotion-Classifier"
    ):
        """Initialize emotion analyzer with EmoSense model"""
        print("🔄 Loading EmoSense Indonesian emotion model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist (Plutchik's 8 emotions)
        self.emotion_labels = [
            "Anger",
            "Anticipation",
            "Disgust",
            "Fear",
            "Joy",
            "Sadness",
            "Surprise",
            "Trust",
        ]

        # Emotion emojis for better display
        self.emotion_emojis = {
            "Anger": "😡",
            "Anticipation": "🤔",
            "Disgust": "🤢",
            "Fear": "😨",
            "Joy": "😊",
            "Sadness": "😢",
            "Surprise": "😲",
            "Trust": "🤝",
        }

        print("✅ Emotion model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🎭 Emotions: {', '.join(self.emotion_labels)}")

    def predict_single(self, text):
        """Predict emotion for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {"label": "Trust", "score": 0.0, "confidence": "low"}

        try:
            result = self.pipe(str(text))
            prediction = result[0]

            # Add confidence level based on score
            if prediction["score"] >= 0.8:
                confidence = "high"
            elif prediction["score"] >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "label": prediction["label"],
                "score": prediction["score"],
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting emotion: {e}")
            return {"label": "Trust", "score": 0.0, "confidence": "error"}

    def predict_batch(self, texts, batch_size=32):
        """Predict emotion for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for emotion analysis...")
        print(f"📊 Batch size: {batch_size}")

        # Process in batches with progress bar
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing emotions"):
            batch = texts[i : i + batch_size]

            # Clean batch texts
            clean_batch = []
            for text in batch:
                if pd.isna(text) or text is None or text == "" or text == "No Comment":
                    clean_batch.append("No Comment")
                else:
                    clean_batch.append(str(text))

            try:
                # Predict batch
                batch_results = self.pipe(clean_batch)

                # Process results
                for j, result in enumerate(batch_results):
                    if clean_batch[j] == "No Comment":
                        results.append(
                            {"label": "Trust", "score": 0.0, "confidence": "low"}
                        )
                    else:
                        # Add confidence level
                        if result["score"] >= 0.8:
                            confidence = "high"
                        elif result["score"] >= 0.6:
                            confidence = "medium"
                        else:
                            confidence = "low"

                        results.append(
                            {
                                "label": result["label"],
                                "score": result["score"],
                                "confidence": confidence,
                            }
                        )

            except Exception as e:
                print(f"Error in emotion batch {i//batch_size + 1}: {e}")
                # Add default predictions for failed batch
                for _ in range(len(batch)):
                    results.append(
                        {"label": "Trust", "score": 0.0, "confidence": "error"}
                    )

        return results

    def analyze_results(self, results):
        """Analyze and display emotion analysis results"""
        total = len(results)

        print(f"\n📊 EMOTION ANALYSIS SUMMARY")
        print("=" * 50)
        print(f"📝 Total texts: {total}")

        # Emotion distribution
        emotion_counts = {}
        for emotion in self.emotion_labels:
            emotion_counts[emotion] = 0

        for result in results:
            label = result["label"]
            emotion_counts[label] = emotion_counts.get(label, 0) + 1

        print(f"\n🎭 Emotion Distribution:")
        # Sort by count, descending
        sorted_emotions = sorted(
            emotion_counts.items(), key=lambda x: x[1], reverse=True
        )
        for emotion, count in sorted_emotions:
            percentage = (count / total) * 100
            emoji = self.emotion_emojis.get(emotion, "🎭")
            print(f"   {emoji} {emotion}: {count} ({percentage:.1f}%)")

        # Confidence distribution
        conf_counts = {"high": 0, "medium": 0, "low": 0, "error": 0}
        for result in results:
            conf_counts[result["confidence"]] += 1

        print(f"\n🎯 Confidence Distribution:")
        for conf, count in conf_counts.items():
            percentage = (count / total) * 100
            print(f"   {conf.capitalize()}: {count} ({percentage:.1f}%)")

        return {
            "total": total,
            "emotion_distribution": emotion_counts,
            "confidence_distribution": conf_counts,
        }

In [8]:
# Test EmotionAnalyzer only

print("🚀 Testing EmotionAnalyzer...")

# Initialize
try:
    emotion_analyzer = EmotionAnalyzer()

    # Test with emotion-specific examples
    emotion_test_texts = [
        ("Saya sangat marah dengan pelayanan ini!", "Expected: Anger"),
        ("Wah senang sekali dapat hadiah ini!", "Expected: Joy"),
        ("Saya merasa sedih sekali hari ini", "Expected: Sadness"),
        ("Ngeri banget nonton film horror tadi", "Expected: Fear"),
        ("Kaget banget ternyata dia datang!", "Expected: Surprise"),
        ("Jijik banget lihat yang begitu", "Expected: Disgust"),
        ("Saya percaya sepenuhnya dengan tim ini", "Expected: Trust"),
        ("Tidak sabar menunggu acara besok!", "Expected: Anticipation"),
    ]

    print(f"\n🧪 Testing individual emotion predictions:")
    print("-" * 60)

    for text, expected in emotion_test_texts:
        result = emotion_analyzer.predict_single(text)
        emotion_emoji = emotion_analyzer.emotion_emojis.get(result["label"], "🎭")
        print(f"Text: '{text}'")
        print(f"   {expected}")
        print(
            f"   🎭 Emotion: {emotion_emoji} {result['label']} ({result['score']:.3f}) - {result['confidence']}"
        )
        print()

    print("✅ EmotionAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in EmotionAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing EmotionAnalyzer...
🔄 Loading EmoSense Indonesian emotion model...


Device set to use cpu


✅ Emotion model loaded successfully!
🔧 Using device: CPU
🎭 Emotions: Anger, Anticipation, Disgust, Fear, Joy, Sadness, Surprise, Trust

🧪 Testing individual emotion predictions:
------------------------------------------------------------
Text: 'Saya sangat marah dengan pelayanan ini!'
   Expected: Anger
   🎭 Emotion: 😡 Anger (0.981) - high

Text: 'Wah senang sekali dapat hadiah ini!'
   Expected: Joy
   🎭 Emotion: 😊 Joy (0.992) - high

Text: 'Saya merasa sedih sekali hari ini'
   Expected: Sadness
   🎭 Emotion: 😢 Sadness (0.993) - high

Text: 'Ngeri banget nonton film horror tadi'
   Expected: Fear
   🎭 Emotion: 😨 Fear (0.979) - high

Text: 'Kaget banget ternyata dia datang!'
   Expected: Surprise
   🎭 Emotion: 😲 Surprise (0.994) - high

Text: 'Saya sangat marah dengan pelayanan ini!'
   Expected: Anger
   🎭 Emotion: 😡 Anger (0.981) - high

Text: 'Wah senang sekali dapat hadiah ini!'
   Expected: Joy
   🎭 Emotion: 😊 Joy (0.992) - high

Text: 'Saya merasa sedih sekali hari ini'
   Expe

# Hate speech Analyze

In [9]:
models_to_compare = [
    "PaceKW/bert-multilabel-indonesian-hate-speech",
    "PaceKW/indobert-base-p1-multilabel-indonesian-hate-speech-new"
]

detailed_results = inspect_model_details(models_to_compare)

🔍 COMPREHENSIVE MODEL INSPECTION

📋 Inspecting: PaceKW/bert-multilabel-indonesian-hate-speech
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: multi_label_classification
   - Number of Labels: 12
   - Categories: ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: multi_label_classification
   - Number of Labels: 12
   - Categories: ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 31.9 thousand

   [Tokenizer]
 

In [10]:
class HateSpeechAnalyzer:

    def __init__(
        self,
        model_name="PaceKW/distilbert-base-multilingual-cased-multilabel-indonesian-hate-speech",
    ):
        """Initialize hate speech analyzer"""
        print("🔄 Loading Indonesian Hate Speech model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
            return_all_scores=True,  # Important for multilabel
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist
        self.hate_categories = [
            "HS",
            "Abusive",
            "HS_Individual",
            "HS_Group",
            "HS_Religion",
            "HS_Race",
            "HS_Physical",
            "HS_Gender",
            "HS_Other",
            "HS_Weak",
            "HS_Moderate",
            "HS_Strong",
        ]

        print("✅ Hate speech model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🏷️ Categories: {', '.join(self.hate_categories)}")

    def predict_single(self, text, threshold=0.5):
        """Predict hate speech for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {
                "is_hate_speech": False,
                "categories": [],
                "scores": {},
                "max_score": 0.0,
                "confidence": "low",
            }

        try:
            # Get predictions for all labels
            results = self.pipe(str(text))

            # Debug: Print hasil untuk lihat struktur
            print(f"Debug - Raw result type: {type(results)}")
            print(f"Debug - Raw result: {results}")

            # Handle different output formats
            if isinstance(results, list):
                # Jika hasil adalah list of lists (nested)
                if len(results) > 0 and isinstance(results[0], list):
                    predictions = results[0]  # Ambil list pertama
                else:
                    predictions = results  # Sudah format yang benar
            else:
                predictions = [results]  # Bungkus dalam list jika bukan list

            # Process multilabel results
            active_categories = []
            all_scores = {}
            max_score = 0.0

            for prediction in predictions:
                # Handle different key formats
                if isinstance(prediction, dict):
                    if "label" in prediction and "score" in prediction:
                        label = prediction["label"]
                        score = prediction["score"]
                    elif "LABEL" in prediction and "SCORE" in prediction:
                        label = prediction["LABEL"]
                        score = prediction["SCORE"]
                    else:
                        print(f"Debug - Unknown prediction format: {prediction}")
                        continue
                else:
                    print(f"Debug - Unexpected prediction type: {type(prediction)}")
                    continue

                all_scores[label] = score
                max_score = max(max_score, score)

                # Add to active categories if above threshold
                if score >= threshold:
                    active_categories.append(label)

            # Determine if hate speech detected
            is_hate_speech = len(active_categories) > 0

            # Confidence based on max score
            if max_score >= 0.8:
                confidence = "high"
            elif max_score >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "is_hate_speech": is_hate_speech,
                "categories": active_categories,
                "scores": all_scores,
                "max_score": max_score,
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting hate speech: {e}")
            import traceback

            traceback.print_exc()
            return {
                "is_hate_speech": False,
                "categories": [],
                "scores": {},
                "max_score": 0.0,
                "confidence": "error",
            }

    # ... rest of the methods remain the same ...
    def predict_batch(self, texts, batch_size=16, threshold=0.5):
        """Predict hate speech for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for hate speech analysis...")
        print(f"📊 Threshold: {threshold} | Batch size: {batch_size}")

        # Process in batches
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing hate speech"):
            batch = texts[i : i + batch_size]

            # Process each text in batch
            for text in batch:
                result = self.predict_single(text, threshold)
                results.append(result)

        return results

In [11]:
# Test HateSpeechAnalyzer only - FIXED VERSION

print("🚀 Testing HateSpeechAnalyzer...")

# Initialize
try:
    # Buat versi yang lebih simple untuk testing
    hate_analyzer = HateSpeechAnalyzer()

    # Test with hate speech examples
    hate_test_texts = [
        ("Selamat pagi semua!", "Expected: Clean"),
        ("Terima kasih atas bantuannya", "Expected: Clean"),
        ("Dasar bodoh tidak tahu apa-apa", "Expected: Abusive/HS_Individual"),
        ("Agama kalian sesat semua", "Expected: HS_Religion/HS_Group"),
        ("Perempuan memang inferior", "Expected: HS_Gender"),
        ("Orang ras itu memang jelek", "Expected: HS_Race"),
        ("Bunuh saja dia", "Expected: HS_Strong"),
        ("Agak aneh sih orangnya", "Expected: HS_Weak"),
    ]

    print(f"\n🧪 Testing individual hate speech predictions:")
    print("-" * 60)

    for text, expected in hate_test_texts:
        # Hapus debug print untuk testing yang clean
        result = hate_analyzer.predict_single(text, threshold=0.5)

        print(f"Text: '{text}'")
        print(f"   {expected}")
        hate_status = "🚨 YES" if result["is_hate_speech"] else "✅ NO"
        print(
            f"   🚨 Hate Speech: {hate_status} ({result['max_score']:.3f}) - {result['confidence']}"
        )
        if result["categories"]:
            print(f"   🏷️ Categories: {', '.join(result['categories'])}")
        print()

    print("✅ HateSpeechAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in HateSpeechAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing HateSpeechAnalyzer...
🔄 Loading Indonesian Hate Speech model...


Device set to use cpu


✅ Hate speech model loaded successfully!
🔧 Using device: CPU
🏷️ Categories: HS, Abusive, HS_Individual, HS_Group, HS_Religion, HS_Race, HS_Physical, HS_Gender, HS_Other, HS_Weak, HS_Moderate, HS_Strong

🧪 Testing individual hate speech predictions:
------------------------------------------------------------
Debug - Raw result type: <class 'list'>
Debug - Raw result: [[{'label': 'HS', 'score': 0.015548990108072758}, {'label': 'Abusive', 'score': 0.0038112031761556864}, {'label': 'HS_Individual', 'score': 0.012012024410068989}, {'label': 'HS_Group', 'score': 0.004762946628034115}, {'label': 'HS_Religion', 'score': 0.0032223816961050034}, {'label': 'HS_Race', 'score': 0.004168652463704348}, {'label': 'HS_Physical', 'score': 0.00048205480561591685}, {'label': 'HS_Gender', 'score': 0.000542783469427377}, {'label': 'HS_Other', 'score': 0.008541245013475418}, {'label': 'HS_Weak', 'score': 0.00872738379985094}, {'label': 'HS_Moderate', 'score': 0.0030228530522435904}, {'label': 'HS_Strong', '

# BERT Model Test

In [12]:
# 1. Inisialisasi pipeline "fill-mask"
# Ini akan mengunduh model jika belum ada di cache
print("Mengunduh model (jika diperlukan)...")
tebak_kata = pipeline("fill-mask", model="cahya/bert-base-indonesian-1.5G")
print("Model siap digunakan.")

# 2. Siapkan beberapa kalimat tes
kalimat1 = "Ibu kota negara Indonesia adalah [MASK]."
kalimat2 = "Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK]."
kalimat3 = "Setelah lelah bekerja seharian, enaknya minum [MASK] dingin."
kalimat4 = "Dia membeli mobil baru berwarna [MASK]."

# 3. Lakukan prediksi dan lihat hasilnya
print(f"\n--- Tes untuk: '{kalimat1}' ---")
hasil1 = tebak_kata(kalimat1)
for prediksi in hasil1:
    print(
        f"Kata: {prediksi['token_str']:<15} | Skor Keyakinan: {prediksi['score']:.4f}"
    )

print(f"\n--- Tes untuk: '{kalimat2}' ---")
hasil2 = tebak_kata(kalimat2, top_k=3)  # Minta 3 tebakan teratas
for prediksi in hasil2:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat3}' ---")
hasil3 = tebak_kata(kalimat3, top_k=3)
for prediksi in hasil3:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat4}' ---")
hasil4 = tebak_kata(kalimat4, top_k=3)
for prediksi in hasil4:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

Mengunduh model (jika diperlukan)...


Some weights of the model checkpoint at cahya/bert-base-indonesian-1.5G were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0
Device set to use mps:0


Model siap digunakan.

--- Tes untuk: 'Ibu kota negara Indonesia adalah [MASK].' ---
Kata: jakarta         | Skor Keyakinan: 0.5408
Kata: yogyakarta      | Skor Keyakinan: 0.0404
Kata: pontianak       | Skor Keyakinan: 0.0294
Kata: makassar        | Skor Keyakinan: 0.0170
Kata: merauke         | Skor Keyakinan: 0.0158

--- Tes untuk: 'Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK].' ---
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang dokter.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang perawat.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang bidan.

--- Tes untuk: 'Setelah lelah bekerja seharian, enaknya minum [MASK] dingin.' ---
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum air dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum minuman dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum teh dingin.

--- Tes untuk: 'Dia membeli mo

# 🌍 XLM-RoBERTa Model Testing

Testing Facebook's XLM-RoBERTa (Cross-lingual RoBERTa) model which supports 100 languages including Indonesian. This model can be useful for:
- Text classification tasks in multiple languages
- Feature extraction for bot detection
- Cross-lingual text analysis
- Masked language modeling

In [13]:
# Install required packages for XLM-RoBERTa
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
        print(f"✅ {package} already installed")
    except ImportError:
        print(f"📦 Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} installed successfully")

# Install transformers if not already installed
install_package("transformers")
install_package("torch")

print("\n🎉 All packages ready for XLM-RoBERTa testing!")

✅ transformers already installed
✅ torch already installed

🎉 All packages ready for XLM-RoBERTa testing!


In [14]:
# Basic XLM-RoBERTa Model Testing
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM
import torch

print("🚀 Loading XLM-RoBERTa Model...")
print("⚠️  This might take a few minutes for the first time (downloading ~2.2GB)")

# Method 1: Using pipeline (High-level API)
try:
    # Load the fill-mask pipeline
    xlm_roberta_pipe = pipeline(
        "fill-mask",
        model="FacebookAI/xlm-roberta-large",
        tokenizer="FacebookAI/xlm-roberta-large"
    )
    print("✅ XLM-RoBERTa pipeline loaded successfully!")

    # Test with English
    english_test = "I think this account is a <mask>."
    print(f"\n🇺🇸 English test: '{english_test}'")
    english_results = xlm_roberta_pipe(english_test, top_k=5)

    for i, result in enumerate(english_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with Indonesian
    indonesian_test = "Saya pikir akun ini adalah <mask>."
    print(f"\n🇮🇩 Indonesian test: '{indonesian_test}'")
    indonesian_results = xlm_roberta_pipe(indonesian_test, top_k=5)

    for i, result in enumerate(indonesian_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with china
    chinese_test = "我认为这个账户是一个<mask>。"
    print(f"\n🇨🇳 Chinese test: '{chinese_test}'")
    chinese_results = xlm_roberta_pipe(chinese_test, top_k=5)

    for i, result in enumerate(chinese_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with arabic
    arabic_test = "أعتقد أن هذا الحساب هو <mask>."
    print(f"\n🇸🇦 Arabic test: '{arabic_test}'")
    arabic_results = xlm_roberta_pipe(arabic_test, top_k=5)

    for i, result in enumerate(arabic_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with japanese
    japanese_test = "このアカウントは<mask>だと思います。"
    print(f"\n🇯🇵 Japanese test: '{japanese_test}'")
    japanese_results = xlm_roberta_pipe(japanese_test, top_k=5)

    for i, result in enumerate(japanese_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with korean
    korean_test = "이 계정은 <mask>라고 생각합니다."
    print(f"\n🇰🇷 Korean test: '{korean_test}'")
    korean_results = xlm_roberta_pipe(korean_test, top_k=5)

    for i, result in enumerate(korean_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

    # Test with spanish
    spanish_test = "Creo que esta cuenta es un <mask>."
    print(f"\n🇪🇸 Spanish test: '{spanish_test}'")
    spanish_results = xlm_roberta_pipe(spanish_test, top_k=5)

    for i, result in enumerate(spanish_results, 1):
        print(f"  {i}. {result['sequence']} (score: {result['score']:.4f})")

except Exception as e:
    print(f"❌ Error loading pipeline: {e}")
    xlm_roberta_pipe = None

🚀 Loading XLM-RoBERTa Model...
⚠️  This might take a few minutes for the first time (downloading ~2.2GB)


Some weights of the model checkpoint at FacebookAI/xlm-roberta-large were not used when initializing XLMRobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0
Device set to use mps:0


✅ XLM-RoBERTa pipeline loaded successfully!

🇺🇸 English test: 'I think this account is a <mask>.'
  1. I think this account is a scam . (score: 0.6098)
  2. I think this account is a fake . (score: 0.2386)
  3. I think this account is a fraud . (score: 0.0285)
  4. I think this account is a troll . (score: 0.0239)
  5. I think this account is a hack . (score: 0.0065)

🇮🇩 Indonesian test: 'Saya pikir akun ini adalah <mask>.'
  1. Saya pikir akun ini adalah palsu . (score: 0.4199)
  2. Saya pikir akun ini adalah penipuan . (score: 0.1155)
  3. Saya pikir akun ini adalah scam . (score: 0.0412)
  4. Saya pikir akun ini adalah ilegal . (score: 0.0322)
  5. Saya pikir akun ini adalah salah . (score: 0.0210)

🇨🇳 Chinese test: '我认为这个账户是一个<mask>。'
  1. I think this account is a scam . (score: 0.6098)
  2. I think this account is a fake . (score: 0.2386)
  3. I think this account is a fraud . (score: 0.0285)
  4. I think this account is a troll . (score: 0.0239)
  5. I think this account is a ha

In [15]:
# Method 2: Direct model loading (Lower-level API)
print("🔧 Loading XLM-RoBERTa with direct model access...")

try:
    # Load tokenizer and model directly
    xlm_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-large")
    xlm_model = AutoModelForMaskedLM.from_pretrained("FacebookAI/xlm-roberta-large")

    print("✅ XLM-RoBERTa tokenizer and model loaded successfully!")
    print(f"📊 Model config:")
    print(f"   - Vocab size: {xlm_tokenizer.vocab_size:,}")
    print(f"   - Max length: {xlm_tokenizer.model_max_length}")
    print(f"   - Model parameters: ~{sum(p.numel() for p in xlm_model.parameters()):,}")

    # Test direct usage
    def test_xlm_roberta_direct(text):
        print(f"\n🔍 Testing: '{text}'")

        # Tokenize
        inputs = xlm_tokenizer(text, return_tensors='pt')
        print(f"   📝 Tokenized: {xlm_tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])}")

        # Get embeddings/features
        with torch.no_grad():
            outputs = xlm_model(**inputs, output_hidden_states=True)
            # Get the hidden states (features)
            hidden_states = outputs.hidden_states[-1]  # Last layer hidden states

        print(f"   🧠 Hidden states shape: {hidden_states.shape}")
        print(f"   📊 Feature vector size per token: {hidden_states.shape[-1]}")

        return hidden_states

    # Test with different languages
    test_texts = [
        "Replace me by any text you'd like.",  # English
        "Ganti saya dengan teks apa pun yang Anda suka.",  # Indonesian
        "このテキストを好きなものに置き換えてください。"  # Japanese
    ]

    for text in test_texts:
        features = test_xlm_roberta_direct(text)

except Exception as e:
    print(f"❌ Error loading model directly: {e}")
    xlm_tokenizer = None
    xlm_model = None

🔧 Loading XLM-RoBERTa with direct model access...


Some weights of the model checkpoint at FacebookAI/xlm-roberta-large were not used when initializing XLMRobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


✅ XLM-RoBERTa tokenizer and model loaded successfully!
📊 Model config:
   - Vocab size: 250,002
   - Max length: 512
   - Model parameters: ~560,142,482

🔍 Testing: 'Replace me by any text you'd like.'
   📝 Tokenized: ['<s>', '▁Re', 'place', '▁me', '▁by', '▁any', '▁text', '▁you', "'", 'd', '▁like', '.', '</s>']
   🧠 Hidden states shape: torch.Size([1, 13, 1024])
   📊 Feature vector size per token: 1024

🔍 Testing: 'Ganti saya dengan teks apa pun yang Anda suka.'
   📝 Tokenized: ['<s>', '▁Gan', 'ti', '▁saya', '▁dengan', '▁teks', '▁apa', '▁pun', '▁yang', '▁Anda', '▁suka', '.', '</s>']
   🧠 Hidden states shape: torch.Size([1, 13, 1024])
   📊 Feature vector size per token: 1024

🔍 Testing: 'Ganti saya dengan teks apa pun yang Anda suka.'
   📝 Tokenized: ['<s>', '▁Gan', 'ti', '▁saya', '▁dengan', '▁teks', '▁apa', '▁pun', '▁yang', '▁Anda', '▁suka', '.', '</s>']
   🧠 Hidden states shape: torch.Size([1, 13, 1024])
   📊 Feature vector size per token: 1024

🔍 Testing: 'このテキストを好きなものに置き換えてください。'
  