# Model Inspect

In [1]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import torch
from tqdm import tqdm

In [2]:
from huggingface_hub import HfApi
import humanize


def inspect_model_details(model_names):
    """
    Inspect multiple models for a comprehensive comparison, including
    architecture, tokenizer, and Hub metadata.
    """
    print("🔍 COMPREHENSIVE MODEL INSPECTION")
    print("=" * 80)

    # Inisialisasi API untuk mengambil data dari Hugging Face Hub
    hf_api = HfApi()
    results = {}

    for model_name in model_names:
        print(f"\n📋 Inspecting: {model_name}")
        print("-" * 60)

        try:
            # 1. Inspeksi Konfigurasi
            config = AutoConfig.from_pretrained(model_name)
            info = {
                "model_type": config.model_type,
                # Detail Arsitektur
                "hidden_size": getattr(config, "hidden_size", "N/A"),
                "num_layers": getattr(config, "num_hidden_layers", "N/A"),
                "num_heads": getattr(config, "num_attention_heads", "N/A"),
                "num_parameters": (
                    humanize.intword(config.num_parameters())
                    if hasattr(config, "num_parameters")
                    and callable(config.num_parameters)
                    else "N/A"
                ),
                # Detail Klasifikasi
                "num_labels": config.num_labels,
                "labels": dict(config.id2label) if hasattr(config, "id2label") else {},
                "problem_type": getattr(config, "problem_type", "Not specified"),
            }

            print("   [Architecture]")
            print(f"   - Model Type: {info['model_type']}")
            print(f"   - Parameters: {info['num_parameters']}")
            print(
                f"   - Layers: {info['num_layers']}, Hidden Size: {info['hidden_size']}, Heads: {info['num_heads']}"
            )

            print("\n   [Classification Task]")
            print(f"   - Problem Type: {info['problem_type']}")
            print(f"   - Number of Labels: {info['num_labels']}")
            if info["labels"]:
                print(f"   - Categories: {list(info['labels'].values())}")

            # 2. Inspeksi Tokenizer
            try:
                tokenizer = AutoTokenizer.from_pretrained(model_name)
                info["tokenizer_class"] = tokenizer.__class__.__name__
                info["vocab_size"] = humanize.intword(tokenizer.vocab_size)

                print("\n   [Tokenizer]")
                print(f"   - Class: {info['tokenizer_class']}")
                print(f"   - Vocabulary Size: {info['vocab_size']}")
            except Exception as tokenizer_error:
                print(f"\n   [Tokenizer]")
                print(f"   - ❌ Error loading tokenizer: {tokenizer_error}")
                info["tokenizer_error"] = str(tokenizer_error)

            # 3. Inspeksi Metadata dari Hugging Face Hub (with better error handling)
            try:
                model_info_hub = hf_api.model_info(model_name)

                # Safe access to attributes
                downloads = getattr(model_info_hub, "downloads", 0)
                likes = getattr(model_info_hub, "likes", 0)
                last_modified = getattr(model_info_hub, "lastModified", None)

                info["downloads"] = humanize.intword(downloads) if downloads else "N/A"
                info["likes"] = humanize.intword(likes) if likes else "N/A"

                # Safe date formatting
                if last_modified:
                    if hasattr(last_modified, "strftime"):
                        info["last_modified"] = last_modified.strftime("%Y-%m-%d")
                    else:
                        info["last_modified"] = str(last_modified).split("T")[0]
                else:
                    info["last_modified"] = "N/A"

                print("\n   [Hub Info]")
                print(f"   - Downloads: {info['downloads']}")
                print(f"   - Likes: {info['likes']}")
                print(f"   - Last Modified: {info['last_modified']}")

            except Exception as hub_error:
                print(f"\n   [Hub Info]")
                print(f"   - ❌ Error accessing Hub info: {hub_error}")
                info["hub_error"] = str(hub_error)
                info["downloads"] = "N/A"
                info["likes"] = "N/A"
                info["last_modified"] = "N/A"

            results[model_name] = info

        except Exception as e:
            print(f"   ❌ Error processing {model_name}: {e}")
            results[model_name] = {"error": str(e)}

    return results

In [3]:
models_to_compare = [
    "PaceKW/bert-multilabel-indonesian-hate-speech",
    "PaceKW/indobert-base-p1-multilabel-indonesian-hate-speech-new",
    "Aardiiiiy/indobertweet-base-Indonesian-sentiment-analysis",
    "Aardiiiiy/EmoSense-ID-Indonesian-Emotion-Classifier",
]

detailed_results = inspect_model_details(models_to_compare)

🔍 COMPREHENSIVE MODEL INSPECTION

📋 Inspecting: PaceKW/bert-multilabel-indonesian-hate-speech
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: multi_label_classification
   - Number of Labels: 12
   - Categories: ['HS', 'Abusive', 'HS_Individual', 'HS_Group', 'HS_Religion', 'HS_Race', 'HS_Physical', 'HS_Gender', 'HS_Other', 'HS_Weak', 'HS_Moderate', 'HS_Strong']

   [Tokenizer]
   - Class: BertTokenizerFast
   - Vocabulary Size: 31.9 thousand

   [Hub Info]
   - Downloads: 39
   - Likes: N/A
   - Last Modified: 2025-05-16

📋 Inspecting: PaceKW/indobert-base-p1-multilabel-indonesian-hate-speech-new
------------------------------------------------------------
   [Architecture]
   - Model Type: bert
   - Parameters: N/A
   - Layers: 12, Hidden Size: 768, Heads: 12

   [Classification Task]
   - Problem Type: multi_label_classifi

In [4]:
import pandas as pd

# Sentiment Analyze

In [5]:
class SentimentAnalyzer:
    def __init__(
        self, model_name="Aardiiiiy/indobertweet-base-Indonesian-sentiment-analysis"
    ):
        """Initialize sentiment analyzer with IndoBERTweet model"""
        print("🔄 Loading IndoBERTweet sentiment model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist
        self.sentiment_labels = ["NEGATIVE", "NEUTRAL", "POSITIVE"]

        print("✅ Sentiment model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🏷️ Labels: {', '.join(self.sentiment_labels)}")

    def predict_single(self, text):
        """Predict sentiment for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {"label": "NEUTRAL", "score": 0.0, "confidence": "low"}

        try:
            result = self.pipe(str(text))
            prediction = result[0]

            # Add confidence level based on score
            if prediction["score"] >= 0.8:
                confidence = "high"
            elif prediction["score"] >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "label": prediction["label"],
                "score": prediction["score"],
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting sentiment: {e}")
            return {"label": "NEUTRAL", "score": 0.0, "confidence": "error"}

    def predict_batch(self, texts, batch_size=32):
        """Predict sentiment for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for sentiment analysis...")
        print(f"📊 Batch size: {batch_size}")

        # Process in batches with progress bar
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing sentiment"):
            batch = texts[i : i + batch_size]

            # Clean batch texts
            clean_batch = []
            for text in batch:
                if pd.isna(text) or text is None or text == "" or text == "No Comment":
                    clean_batch.append("No Comment")
                else:
                    clean_batch.append(str(text))

            try:
                # Predict batch
                batch_results = self.pipe(clean_batch)

                # Process results
                for j, result in enumerate(batch_results):
                    if clean_batch[j] == "No Comment":
                        results.append(
                            {"label": "NEUTRAL", "score": 0.0, "confidence": "low"}
                        )
                    else:
                        # Add confidence level
                        if result["score"] >= 0.8:
                            confidence = "high"
                        elif result["score"] >= 0.6:
                            confidence = "medium"
                        else:
                            confidence = "low"

                        results.append(
                            {
                                "label": result["label"],
                                "score": result["score"],
                                "confidence": confidence,
                            }
                        )

            except Exception as e:
                print(f"Error in sentiment batch {i//batch_size + 1}: {e}")
                # Add neutral predictions for failed batch
                for _ in range(len(batch)):
                    results.append(
                        {"label": "NEUTRAL", "score": 0.0, "confidence": "error"}
                    )

        return results

    def analyze_results(self, results):
        """Analyze and display sentiment analysis results"""
        total = len(results)

        print(f"\n📊 SENTIMENT ANALYSIS SUMMARY")
        print("=" * 50)
        print(f"📝 Total texts: {total}")

        # Sentiment distribution
        sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
        for result in results:
            label = result["label"]
            sentiment_counts[label] = sentiment_counts.get(label, 0) + 1

        print(f"\n💭 Sentiment Distribution:")
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total) * 100
            emoji = (
                "😊"
                if sentiment == "POSITIVE"
                else "😞" if sentiment == "NEGATIVE" else "😐"
            )
            print(f"   {emoji} {sentiment}: {count} ({percentage:.1f}%)")

        # Confidence distribution
        conf_counts = {"high": 0, "medium": 0, "low": 0, "error": 0}
        for result in results:
            conf_counts[result["confidence"]] += 1

        print(f"\n🎯 Confidence Distribution:")
        for conf, count in conf_counts.items():
            percentage = (count / total) * 100
            print(f"   {conf.capitalize()}: {count} ({percentage:.1f}%)")

        return {
            "total": total,
            "sentiment_distribution": sentiment_counts,
            "confidence_distribution": conf_counts,
        }

In [6]:
# Test SentimentAnalyzer only

print("🚀 Testing SentimentAnalyzer...")

# Initialize
try:
    sentiment_analyzer = SentimentAnalyzer()

    # Test with some examples
    test_texts = [
        "Gimana sih @layananinternet, sinyalnya ilang-ilangan mulu dari pagi di daerah Bekasi. Mana lagi butuh buat kerjaan #internetdown",
        "Sumpah, vibe-nya cozy abis buat nugas. Kopinya juga aje gile mantep. Fix bakal jadi langganan! ✨",
        "Overall experience-nya lumayan sih, cuman servicenya agak lama. Nunggunya ampe stengah jam sndiri.",
        "Desain HP-nya keren, kameranya juga oke. TAPI KENAPA BATERAINYA BOROS BANGET?! Baru setengah hari udah abis 😭",
        "Ada yg tau info konser terbaru bulan ini gaes?",
    ]

    print(f"\n🧪 Testing individual sentiment predictions:")
    print("-" * 60)

    for text in test_texts:
        result = sentiment_analyzer.predict_single(text)
        sentiment_emoji = (
            "😊"
            if result["label"] == "POSITIVE"
            else "😞" if result["label"] == "NEGATIVE" else "😐"
        )
        print(f"Text: '{text}'")
        print(
            f"   💭 Sentiment: {sentiment_emoji} {result['label']} ({result['score']:.3f}) - {result['confidence']}"
        )
        print()

    print("✅ SentimentAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in SentimentAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing SentimentAnalyzer...
🔄 Loading IndoBERTweet sentiment model...


Device set to use cpu


✅ Sentiment model loaded successfully!
🔧 Using device: CPU
🏷️ Labels: NEGATIVE, NEUTRAL, POSITIVE

🧪 Testing individual sentiment predictions:
------------------------------------------------------------
Text: 'Gimana sih @layananinternet, sinyalnya ilang-ilangan mulu dari pagi di daerah Bekasi. Mana lagi butuh buat kerjaan #internetdown'
   💭 Sentiment: 😐 Negative (0.970) - high

Text: 'Sumpah, vibe-nya cozy abis buat nugas. Kopinya juga aje gile mantep. Fix bakal jadi langganan! ✨'
   💭 Sentiment: 😐 Positive (0.997) - high

Text: 'Overall experience-nya lumayan sih, cuman servicenya agak lama. Nunggunya ampe stengah jam sndiri.'
   💭 Sentiment: 😐 Neutral (0.995) - high

Text: 'Desain HP-nya keren, kameranya juga oke. TAPI KENAPA BATERAINYA BOROS BANGET?! Baru setengah hari udah abis 😭'
   💭 Sentiment: 😐 Neutral (0.841) - high

Text: 'Ada yg tau info konser terbaru bulan ini gaes?'
   💭 Sentiment: 😐 Neutral (0.998) - high

✅ SentimentAnalyzer test completed!


In [7]:
import re


def preprocess_text(text):
    """Simple preprocessing for social media text"""
    if not text:
        return text

    text = str(text).lower()
    text = re.sub(r"@\w+|http\S+", "", text)  # Remove mentions & URLs
    text = re.sub(r"#(\w+)", r"\1", text)  # Remove # but keep text
    text = re.sub(r"\s+", " ", text).strip()  # Clean spaces
    return text


# REALISTIC TWITTER-LIKE SENTIMENT TEST
print("🐦 REALISTIC TWITTER/SOCIAL MEDIA SENTIMENT TEST")
print("=" * 60)

try:
    sentiment_analyzer = SentimentAnalyzer()

    # Realistic social media texts with typos, slang, abbreviations
    twitter_texts = [
        # Positive tweets
        "mantepppp bgt sih ini!! rekomended bgt deh 👍👍",
        "gw suka bgt sama pelayanannya.. ramah2 orangnya",
        "worth it bgt!! fix bakal balik lg kesini",
        "OMG enak banget makanannyaa... puas dehh 😍",
        # Negative tweets
        "anjrit lama bgt nunggunyaa... udh kesel",
        "jelek bgt dah pelayanannya.. ga worth it",
        "overpriced bgt, rasa b aja... disappointed",
        "wth... udh bayar mahal service nya kyk gini doang??",
        # Neutral/Mixed
        "lumayan sih, tp masih bisa ditingkatin lg",
        "biasa aja... nothing special tbh",
        "okee lah.. standar gt",
        "ada plus minusnya.. overall oke2 aja",
        # Very short/unclear
        "meh",
        "gg",
        "nice",
        "wkwk lucu",
        "asek",
    ]

    print("\n🧪 Testing original vs preprocessed on social media texts:")
    print("-" * 60)

    for i, original in enumerate(twitter_texts, 1):
        preprocessed = preprocess_text(original)

        # Predict both
        orig_result = sentiment_analyzer.predict_single(original)
        prep_result = sentiment_analyzer.predict_single(preprocessed)

        print(f"\n{i}. '{original}'")
        if original != preprocessed:
            print(f"   Cleaned: '{preprocessed}'")

        # Results with emojis
        orig_emoji = (
            "😊"
            if orig_result["label"] == "POSITIVE"
            else "😞" if orig_result["label"] == "NEGATIVE" else "😐"
        )
        prep_emoji = (
            "😊"
            if prep_result["label"] == "POSITIVE"
            else "😞" if prep_result["label"] == "NEGATIVE" else "😐"
        )

        print(
            f"   Original: {orig_emoji} {orig_result['label']} ({orig_result['score']:.2f})"
        )
        if original != preprocessed:
            print(
                f"   Cleaned : {prep_emoji} {prep_result['label']} ({prep_result['score']:.2f})"
            )

            # Check difference
            if orig_result["label"] != prep_result["label"]:
                print("   🔄 DIFFERENT PREDICTION!")
            elif abs(orig_result["score"] - prep_result["score"]) > 0.1:
                print("   📊 SCORE CHANGED!")

    # Summary analysis
    print(f"\n📊 ANALYSIS SUMMARY:")
    print("=" * 40)

    # Count predictions by type
    pos_count = sum(
        1
        for text in twitter_texts
        if sentiment_analyzer.predict_single(text)["label"] == "POSITIVE"
    )
    neg_count = sum(
        1
        for text in twitter_texts
        if sentiment_analyzer.predict_single(text)["label"] == "NEGATIVE"
    )
    neu_count = sum(
        1
        for text in twitter_texts
        if sentiment_analyzer.predict_single(text)["label"] == "NEUTRAL"
    )

    print(
        f"😊 Positive: {pos_count}/{len(twitter_texts)} ({pos_count/len(twitter_texts)*100:.0f}%)"
    )
    print(
        f"😞 Negative: {neg_count}/{len(twitter_texts)} ({neg_count/len(twitter_texts)*100:.0f}%)"
    )
    print(
        f"😐 Neutral:  {neu_count}/{len(twitter_texts)} ({neu_count/len(twitter_texts)*100:.0f}%)"
    )

    print(f"\n💡 Key observations:")
    print("   • How well does the model handle Indonesian slang?")
    print("   • Does preprocessing help with typos and abbreviations?")
    print("   • Are very short texts classified correctly?")

except Exception as e:
    print(f"❌ Error: {e}")

print("\n✅ Social media sentiment test completed!")

🐦 REALISTIC TWITTER/SOCIAL MEDIA SENTIMENT TEST
🔄 Loading IndoBERTweet sentiment model...


Device set to use cpu


✅ Sentiment model loaded successfully!
🔧 Using device: CPU
🏷️ Labels: NEGATIVE, NEUTRAL, POSITIVE

🧪 Testing original vs preprocessed on social media texts:
------------------------------------------------------------

1. 'mantepppp bgt sih ini!! rekomended bgt deh 👍👍'
   Original: 😐 Positive (1.00)

2. 'gw suka bgt sama pelayanannya.. ramah2 orangnya'
   Original: 😐 Positive (1.00)

3. 'worth it bgt!! fix bakal balik lg kesini'
   Original: 😐 Positive (1.00)

4. 'OMG enak banget makanannyaa... puas dehh 😍'
   Cleaned: 'omg enak banget makanannyaa... puas dehh 😍'
   Original: 😐 Positive (1.00)
   Cleaned : 😐 Positive (1.00)

5. 'anjrit lama bgt nunggunyaa... udh kesel'
   Original: 😐 Negative (1.00)

6. 'jelek bgt dah pelayanannya.. ga worth it'
   Original: 😐 Negative (1.00)

7. 'overpriced bgt, rasa b aja... disappointed'
   Original: 😐 Negative (1.00)

8. 'wth... udh bayar mahal service nya kyk gini doang??'
   Original: 😐 Negative (0.97)

9. 'lumayan sih, tp masih bisa ditingkatin 

# Emotion Analyze

In [8]:
class EmotionAnalyzer:
    def __init__(
        self, model_name="Aardiiiiy/EmoSense-ID-Indonesian-Emotion-Classifier"
    ):
        """Initialize emotion analyzer with EmoSense model"""
        print("🔄 Loading EmoSense Indonesian emotion model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist (Plutchik's 8 emotions)
        self.emotion_labels = [
            "Anger",
            "Anticipation",
            "Disgust",
            "Fear",
            "Joy",
            "Sadness",
            "Surprise",
            "Trust",
        ]

        # Emotion emojis for better display
        self.emotion_emojis = {
            "Anger": "😡",
            "Anticipation": "🤔",
            "Disgust": "🤢",
            "Fear": "😨",
            "Joy": "😊",
            "Sadness": "😢",
            "Surprise": "😲",
            "Trust": "🤝",
        }

        print("✅ Emotion model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🎭 Emotions: {', '.join(self.emotion_labels)}")

    def predict_single(self, text):
        """Predict emotion for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {"label": "Trust", "score": 0.0, "confidence": "low"}

        try:
            result = self.pipe(str(text))
            prediction = result[0]

            # Add confidence level based on score
            if prediction["score"] >= 0.8:
                confidence = "high"
            elif prediction["score"] >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "label": prediction["label"],
                "score": prediction["score"],
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting emotion: {e}")
            return {"label": "Trust", "score": 0.0, "confidence": "error"}

    def predict_batch(self, texts, batch_size=32):
        """Predict emotion for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for emotion analysis...")
        print(f"📊 Batch size: {batch_size}")

        # Process in batches with progress bar
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing emotions"):
            batch = texts[i : i + batch_size]

            # Clean batch texts
            clean_batch = []
            for text in batch:
                if pd.isna(text) or text is None or text == "" or text == "No Comment":
                    clean_batch.append("No Comment")
                else:
                    clean_batch.append(str(text))

            try:
                # Predict batch
                batch_results = self.pipe(clean_batch)

                # Process results
                for j, result in enumerate(batch_results):
                    if clean_batch[j] == "No Comment":
                        results.append(
                            {"label": "Trust", "score": 0.0, "confidence": "low"}
                        )
                    else:
                        # Add confidence level
                        if result["score"] >= 0.8:
                            confidence = "high"
                        elif result["score"] >= 0.6:
                            confidence = "medium"
                        else:
                            confidence = "low"

                        results.append(
                            {
                                "label": result["label"],
                                "score": result["score"],
                                "confidence": confidence,
                            }
                        )

            except Exception as e:
                print(f"Error in emotion batch {i//batch_size + 1}: {e}")
                # Add default predictions for failed batch
                for _ in range(len(batch)):
                    results.append(
                        {"label": "Trust", "score": 0.0, "confidence": "error"}
                    )

        return results

    def analyze_results(self, results):
        """Analyze and display emotion analysis results"""
        total = len(results)

        print(f"\n📊 EMOTION ANALYSIS SUMMARY")
        print("=" * 50)
        print(f"📝 Total texts: {total}")

        # Emotion distribution
        emotion_counts = {}
        for emotion in self.emotion_labels:
            emotion_counts[emotion] = 0

        for result in results:
            label = result["label"]
            emotion_counts[label] = emotion_counts.get(label, 0) + 1

        print(f"\n🎭 Emotion Distribution:")
        # Sort by count, descending
        sorted_emotions = sorted(
            emotion_counts.items(), key=lambda x: x[1], reverse=True
        )
        for emotion, count in sorted_emotions:
            percentage = (count / total) * 100
            emoji = self.emotion_emojis.get(emotion, "🎭")
            print(f"   {emoji} {emotion}: {count} ({percentage:.1f}%)")

        # Confidence distribution
        conf_counts = {"high": 0, "medium": 0, "low": 0, "error": 0}
        for result in results:
            conf_counts[result["confidence"]] += 1

        print(f"\n🎯 Confidence Distribution:")
        for conf, count in conf_counts.items():
            percentage = (count / total) * 100
            print(f"   {conf.capitalize()}: {count} ({percentage:.1f}%)")

        return {
            "total": total,
            "emotion_distribution": emotion_counts,
            "confidence_distribution": conf_counts,
        }

In [9]:
# Test EmotionAnalyzer only

print("🚀 Testing EmotionAnalyzer...")

# Initialize
try:
    emotion_analyzer = EmotionAnalyzer()

    # Test with emotion-specific examples
    emotion_test_texts = [
        ("Saya sangat marah dengan pelayanan ini!", "Expected: Anger"),
        ("Wah senang sekali dapat hadiah ini!", "Expected: Joy"),
        ("Saya merasa sedih sekali hari ini", "Expected: Sadness"),
        ("Ngeri banget nonton film horror tadi", "Expected: Fear"),
        ("Kaget banget ternyata dia datang!", "Expected: Surprise"),
        ("Jijik banget lihat yang begitu", "Expected: Disgust"),
        ("Saya percaya sepenuhnya dengan tim ini", "Expected: Trust"),
        ("Tidak sabar menunggu acara besok!", "Expected: Anticipation"),
    ]

    print(f"\n🧪 Testing individual emotion predictions:")
    print("-" * 60)

    for text, expected in emotion_test_texts:
        result = emotion_analyzer.predict_single(text)
        emotion_emoji = emotion_analyzer.emotion_emojis.get(result["label"], "🎭")
        print(f"Text: '{text}'")
        print(f"   {expected}")
        print(
            f"   🎭 Emotion: {emotion_emoji} {result['label']} ({result['score']:.3f}) - {result['confidence']}"
        )
        print()

    print("✅ EmotionAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in EmotionAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing EmotionAnalyzer...
🔄 Loading EmoSense Indonesian emotion model...


Device set to use cpu


✅ Emotion model loaded successfully!
🔧 Using device: CPU
🎭 Emotions: Anger, Anticipation, Disgust, Fear, Joy, Sadness, Surprise, Trust

🧪 Testing individual emotion predictions:
------------------------------------------------------------
Text: 'Saya sangat marah dengan pelayanan ini!'
   Expected: Anger
   🎭 Emotion: 😡 Anger (0.981) - high

Text: 'Wah senang sekali dapat hadiah ini!'
   Expected: Joy
   🎭 Emotion: 😊 Joy (0.992) - high

Text: 'Saya merasa sedih sekali hari ini'
   Expected: Sadness
   🎭 Emotion: 😢 Sadness (0.993) - high

Text: 'Ngeri banget nonton film horror tadi'
   Expected: Fear
   🎭 Emotion: 😨 Fear (0.979) - high

Text: 'Kaget banget ternyata dia datang!'
   Expected: Surprise
   🎭 Emotion: 😲 Surprise (0.994) - high

Text: 'Jijik banget lihat yang begitu'
   Expected: Disgust
   🎭 Emotion: 🤢 Disgust (0.990) - high

Text: 'Saya percaya sepenuhnya dengan tim ini'
   Expected: Trust
   🎭 Emotion: 🤝 Trust (0.993) - high

Text: 'Tidak sabar menunggu acara besok!'
   Ex

# Hate speech Analyze

In [10]:
class HateSpeechAnalyzer:

    def __init__(
        self,
        model_name="PaceKW/distilbert-base-multilingual-cased-multilabel-indonesian-hate-speech",
    ):
        """Initialize hate speech analyzer"""
        print("🔄 Loading Indonesian Hate Speech model...")

        # Using pipeline (simplest approach)
        self.pipe = pipeline(
            "text-classification",
            model=model_name,
            device=0 if torch.cuda.is_available() else -1,
            return_all_scores=True,  # Important for multilabel
        )

        # Load tokenizer and model separately for more control if needed
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)

        # Based on our inspection - we know exactly what labels exist
        self.hate_categories = [
            "HS",
            "Abusive",
            "HS_Individual",
            "HS_Group",
            "HS_Religion",
            "HS_Race",
            "HS_Physical",
            "HS_Gender",
            "HS_Other",
            "HS_Weak",
            "HS_Moderate",
            "HS_Strong",
        ]

        print("✅ Hate speech model loaded successfully!")
        print(f"🔧 Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")
        print(f"🏷️ Categories: {', '.join(self.hate_categories)}")

    def predict_single(self, text, threshold=0.5):
        """Predict hate speech for a single text"""
        if pd.isna(text) or text is None or text == "" or text == "No Comment":
            return {
                "is_hate_speech": False,
                "categories": [],
                "scores": {},
                "max_score": 0.0,
                "confidence": "low",
            }

        try:
            # Get predictions for all labels
            results = self.pipe(str(text))

            # Debug: Print hasil untuk lihat struktur
            print(f"Debug - Raw result type: {type(results)}")
            print(f"Debug - Raw result: {results}")

            # Handle different output formats
            if isinstance(results, list):
                # Jika hasil adalah list of lists (nested)
                if len(results) > 0 and isinstance(results[0], list):
                    predictions = results[0]  # Ambil list pertama
                else:
                    predictions = results  # Sudah format yang benar
            else:
                predictions = [results]  # Bungkus dalam list jika bukan list

            # Process multilabel results
            active_categories = []
            all_scores = {}
            max_score = 0.0

            for prediction in predictions:
                # Handle different key formats
                if isinstance(prediction, dict):
                    if "label" in prediction and "score" in prediction:
                        label = prediction["label"]
                        score = prediction["score"]
                    elif "LABEL" in prediction and "SCORE" in prediction:
                        label = prediction["LABEL"]
                        score = prediction["SCORE"]
                    else:
                        print(f"Debug - Unknown prediction format: {prediction}")
                        continue
                else:
                    print(f"Debug - Unexpected prediction type: {type(prediction)}")
                    continue

                all_scores[label] = score
                max_score = max(max_score, score)

                # Add to active categories if above threshold
                if score >= threshold:
                    active_categories.append(label)

            # Determine if hate speech detected
            is_hate_speech = len(active_categories) > 0

            # Confidence based on max score
            if max_score >= 0.8:
                confidence = "high"
            elif max_score >= 0.6:
                confidence = "medium"
            else:
                confidence = "low"

            return {
                "is_hate_speech": is_hate_speech,
                "categories": active_categories,
                "scores": all_scores,
                "max_score": max_score,
                "confidence": confidence,
            }

        except Exception as e:
            print(f"Error predicting hate speech: {e}")
            import traceback

            traceback.print_exc()
            return {
                "is_hate_speech": False,
                "categories": [],
                "scores": {},
                "max_score": 0.0,
                "confidence": "error",
            }

    # ... rest of the methods remain the same ...
    def predict_batch(self, texts, batch_size=16, threshold=0.5):
        """Predict hate speech for multiple texts efficiently"""
        results = []

        # Convert to list if pandas Series
        if hasattr(texts, "tolist"):
            texts = texts.tolist()

        print(f"🔄 Processing {len(texts)} texts for hate speech analysis...")
        print(f"📊 Threshold: {threshold} | Batch size: {batch_size}")

        # Process in batches
        for i in tqdm(range(0, len(texts), batch_size), desc="Analyzing hate speech"):
            batch = texts[i : i + batch_size]

            # Process each text in batch
            for text in batch:
                result = self.predict_single(text, threshold)
                results.append(result)

        return results

In [8]:
# Test HateSpeechAnalyzer only - FIXED VERSION

print("🚀 Testing HateSpeechAnalyzer...")

# Initialize
try:
    # Buat versi yang lebih simple untuk testing
    hate_analyzer = HateSpeechAnalyzer()

    # Test with hate speech examples
    hate_test_texts = [
        ("Selamat pagi semua!", "Expected: Clean"),
        ("Terima kasih atas bantuannya", "Expected: Clean"),
        ("Dasar bodoh tidak tahu apa-apa", "Expected: Abusive/HS_Individual"),
        ("Agama kalian sesat semua", "Expected: HS_Religion/HS_Group"),
        ("Perempuan memang inferior", "Expected: HS_Gender"),
        ("Orang ras itu memang jelek", "Expected: HS_Race"),
        ("Bunuh saja dia", "Expected: HS_Strong"),
        ("Agak aneh sih orangnya", "Expected: HS_Weak"),
    ]

    print(f"\n🧪 Testing individual hate speech predictions:")
    print("-" * 60)

    for text, expected in hate_test_texts:
        # Hapus debug print untuk testing yang clean
        result = hate_analyzer.predict_single(text, threshold=0.5)

        print(f"Text: '{text}'")
        print(f"   {expected}")
        hate_status = "🚨 YES" if result["is_hate_speech"] else "✅ NO"
        print(
            f"   🚨 Hate Speech: {hate_status} ({result['max_score']:.3f}) - {result['confidence']}"
        )
        if result["categories"]:
            print(f"   🏷️ Categories: {', '.join(result['categories'])}")
        print()

    print("✅ HateSpeechAnalyzer test completed!")

except Exception as e:
    print(f"❌ Error in HateSpeechAnalyzer: {e}")
    import traceback

    traceback.print_exc()

🚀 Testing HateSpeechAnalyzer...
🔄 Loading Indonesian Hate Speech model...


Device set to use cpu


✅ Hate speech model loaded successfully!
🔧 Using device: CPU
🏷️ Categories: HS, Abusive, HS_Individual, HS_Group, HS_Religion, HS_Race, HS_Physical, HS_Gender, HS_Other, HS_Weak, HS_Moderate, HS_Strong

🧪 Testing individual hate speech predictions:
------------------------------------------------------------
❌ Error in HateSpeechAnalyzer: name 'pd' is not defined


Traceback (most recent call last):
  File "/var/folders/2b/7ktqrd293f19ydr7g3yl7swr0000gn/T/ipykernel_88538/1200217081.py", line 27, in <module>
    result = hate_analyzer.predict_single(text, threshold=0.5)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/2b/7ktqrd293f19ydr7g3yl7swr0000gn/T/ipykernel_88538/2860086159.py", line 44, in predict_single
    if pd.isna(text) or text is None or text == "" or text == "No Comment":
       ^^
NameError: name 'pd' is not defined


In [12]:
hate_analyzer = HateSpeechAnalyzer()

hate_analyzer.predict_single("Dasar lu bego banget sih.")

🔄 Loading Indonesian Hate Speech model...


Device set to use cpu


✅ Hate speech model loaded successfully!
🔧 Using device: CPU
🏷️ Categories: HS, Abusive, HS_Individual, HS_Group, HS_Religion, HS_Race, HS_Physical, HS_Gender, HS_Other, HS_Weak, HS_Moderate, HS_Strong
Debug - Raw result type: <class 'list'>
Debug - Raw result: [[{'label': 'HS', 'score': 0.9977204203605652}, {'label': 'Abusive', 'score': 0.9990230798721313}, {'label': 'HS_Individual', 'score': 0.998616099357605}, {'label': 'HS_Group', 'score': 0.0007585312123410404}, {'label': 'HS_Religion', 'score': 0.00077065295772627}, {'label': 'HS_Race', 'score': 0.0006125601939857006}, {'label': 'HS_Physical', 'score': 0.0011905721621587873}, {'label': 'HS_Gender', 'score': 0.001185533357784152}, {'label': 'HS_Other', 'score': 0.9972537159919739}, {'label': 'HS_Weak', 'score': 0.9980564117431641}, {'label': 'HS_Moderate', 'score': 0.0010938361519947648}, {'label': 'HS_Strong', 'score': 0.0006570350378751755}]]


{'is_hate_speech': True,
 'categories': ['HS', 'Abusive', 'HS_Individual', 'HS_Other', 'HS_Weak'],
 'scores': {'HS': 0.9977204203605652,
  'Abusive': 0.9990230798721313,
  'HS_Individual': 0.998616099357605,
  'HS_Group': 0.0007585312123410404,
  'HS_Religion': 0.00077065295772627,
  'HS_Race': 0.0006125601939857006,
  'HS_Physical': 0.0011905721621587873,
  'HS_Gender': 0.001185533357784152,
  'HS_Other': 0.9972537159919739,
  'HS_Weak': 0.9980564117431641,
  'HS_Moderate': 0.0010938361519947648,
  'HS_Strong': 0.0006570350378751755},
 'max_score': 0.9990230798721313,
 'confidence': 'high'}

In [11]:
# Simple debugging version
from transformers import pipeline
import json

print("=== DEBUGGING HATE SPEECH MODEL ===\n")

# Load the model
print("1. Loading model...")
try:
    pipe = pipeline(
        "text-classification",
        model="PaceKW/distilbert-base-multilingual-cased-multilabel-indonesian-hate-speech",
    )
    print("✅ Model loaded successfully!\n")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    exit()

# Test text
test_text = "Laki-laki di sini mah cemen semua, bener2 gak layak."
print(f"2. Testing with: '{test_text}'\n")

# Method 1: Default prediction (no return_all_scores)
print("--- Method 1: Default prediction ---")
try:
    result1 = pipe(test_text)
    print(f"Type: {type(result1)}")
    print(f"Content: {result1}")
    print(f"JSON: {json.dumps(result1, indent=2, ensure_ascii=False)}\n")
except Exception as e:
    print(f"❌ Error: {e}\n")

# Method 2: With return_all_scores=True
print("--- Method 2: With return_all_scores=True ---")
try:
    result2 = pipe(test_text, return_all_scores=True)
    print(f"Type: {type(result2)}")
    print(f"Length: {len(result2) if hasattr(result2, '__len__') else 'N/A'}")
    print(f"Content: {result2}")
    print(f"JSON: {json.dumps(result2, indent=2, ensure_ascii=False)}\n")
except Exception as e:
    print(f"❌ Error: {e}\n")

# Method 3: Multiple texts
# print("--- Method 3: Multiple texts ---")
# texts = ["Kamu jelek", "Selamat pagi"]
# try:
#     result3 = pipe(texts)
#     print(f"Type: {type(result3)}")
#     print(f"Length: {len(result3) if hasattr(result3, '__len__') else 'N/A'}")
#     print(f"Content: {result3}")
#     print(f"JSON: {json.dumps(result3, indent=2, ensure_ascii=False)}\n")
# except Exception as e:
#     print(f"❌ Error: {e}\n")

# Method 4: Check model config
print("--- Method 4: Model info ---")
try:
    print(f"Model name: {pipe.model.name_or_path}")
    print(f"Task: {pipe.task}")
    if hasattr(pipe.model.config, "id2label"):
        print(f"Labels: {pipe.model.config.id2label}")
    if hasattr(pipe.model.config, "problem_type"):
        print(f"Problem type: {pipe.model.config.problem_type}")
except Exception as e:
    print(f"❌ Error getting model info: {e}")

print("\n=== DEBUG COMPLETE ===")
print("Run this first, then tell me what you see!")

=== DEBUGGING HATE SPEECH MODEL ===

1. Loading model...


Device set to use mps:0


✅ Model loaded successfully!

2. Testing with: 'Laki-laki di sini mah cemen semua, bener2 gak layak.'

--- Method 1: Default prediction ---
Type: <class 'list'>
Content: [{'label': 'Abusive', 'score': 0.01132898684591055}]
JSON: [
  {
    "label": "Abusive",
    "score": 0.01132898684591055
  }
]

--- Method 2: With return_all_scores=True ---
Type: <class 'list'>
Length: 1
Content: [[{'label': 'HS', 'score': 0.00534218642860651}, {'label': 'Abusive', 'score': 0.01132898684591055}, {'label': 'HS_Individual', 'score': 0.002088962122797966}, {'label': 'HS_Group', 'score': 0.003960433881729841}, {'label': 'HS_Religion', 'score': 0.0015794789651408792}, {'label': 'HS_Race', 'score': 0.0012448272900655866}, {'label': 'HS_Physical', 'score': 0.00023130213958211243}, {'label': 'HS_Gender', 'score': 0.0002631369570735842}, {'label': 'HS_Other', 'score': 0.003423444228246808}, {'label': 'HS_Weak', 'score': 0.0018494758987799287}, {'label': 'HS_Moderate', 'score': 0.0031918382737785578}, {'label'



In [14]:
from transformers import pipeline

# 1. Inisialisasi pipeline "fill-mask"
# Ini akan mengunduh model jika belum ada di cache
print("Mengunduh model (jika diperlukan)...")
tebak_kata = pipeline("fill-mask", model="cahya/bert-base-indonesian-1.5G")
print("Model siap digunakan.")

# 2. Siapkan beberapa kalimat tes
kalimat1 = "Ibu kota negara Indonesia adalah [MASK]."
kalimat2 = "Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK]."
kalimat3 = "Setelah lelah bekerja seharian, enaknya minum [MASK] dingin."
kalimat4 = "Dia membeli mobil baru berwarna [MASK]."

# 3. Lakukan prediksi dan lihat hasilnya
print(f"\n--- Tes untuk: '{kalimat1}' ---")
hasil1 = tebak_kata(kalimat1)
for prediksi in hasil1:
    print(
        f"Kata: {prediksi['token_str']:<15} | Skor Keyakinan: {prediksi['score']:.4f}"
    )

print(f"\n--- Tes untuk: '{kalimat2}' ---")
hasil2 = tebak_kata(kalimat2, top_k=3)  # Minta 3 tebakan teratas
for prediksi in hasil2:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat3}' ---")
hasil3 = tebak_kata(kalimat3, top_k=3)
for prediksi in hasil3:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

print(f"\n--- Tes untuk: '{kalimat4}' ---")
hasil4 = tebak_kata(kalimat4, top_k=3)
for prediksi in hasil4:
    print(f"Kalimat Lengkap: {prediksi['sequence']}")

Mengunduh model (jika diperlukan)...


Some weights of the model checkpoint at cahya/bert-base-indonesian-1.5G were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


Model siap digunakan.

--- Tes untuk: 'Ibu kota negara Indonesia adalah [MASK].' ---
Kata: jakarta         | Skor Keyakinan: 0.5408
Kata: yogyakarta      | Skor Keyakinan: 0.0404
Kata: pontianak       | Skor Keyakinan: 0.0294
Kata: makassar        | Skor Keyakinan: 0.0170
Kata: merauke         | Skor Keyakinan: 0.0158

--- Tes untuk: 'Orang yang bekerja di rumah sakit biasanya adalah seorang [MASK].' ---
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang dokter.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang perawat.
Kalimat Lengkap: orang yang bekerja di rumah sakit biasanya adalah seorang bidan.

--- Tes untuk: 'Setelah lelah bekerja seharian, enaknya minum [MASK] dingin.' ---
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum air dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum minuman dingin.
Kalimat Lengkap: setelah lelah bekerja seharian, enaknya minum teh dingin.

--- Tes untuk: 'Dia membeli mo