# üé® Gradio Web Interface for Unified Hate Speech Classifier

This notebook provides a beautiful web interface for the unified classifier.

**Features:**
- Automatic language detection (Hindi/English)
- Real-time classification
- Probability distribution visualization
- Example texts for quick testing

In [None]:
# Install required libraries
!pip install langdetect transformers torch gradio --quiet

: 

In [None]:
import torch
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from langdetect import detect, DetectorFactory
from langdetect.lang_detect_exception import LangDetectException
import warnings

# Set seed for consistent language detection results
DetectorFactory.seed = 0
warnings.filterwarnings('ignore')

print(f"‚úÖ Libraries imported! Gradio version: {gr.__version__}")

In [None]:
class LanguageDetector:
    """Language detector that identifies Hindi and English text."""
    
    DEVANAGARI_RANGE = (0x0900, 0x097F)
    ASCII_RANGE = (0x0041, 0x007A)
    
    def __init__(self):
        self.supported_languages = {'hi': 'hindi', 'en': 'english'}
    
    def _get_script_ratio(self, text):
        devanagari_count = latin_count = total_chars = 0
        for char in text:
            code = ord(char)
            if char.isalpha():
                total_chars += 1
                if self.DEVANAGARI_RANGE[0] <= code <= self.DEVANAGARI_RANGE[1]:
                    devanagari_count += 1
                elif self.ASCII_RANGE[0] <= code <= self.ASCII_RANGE[1]:
                    latin_count += 1
        if total_chars == 0:
            return 0, 0
        return devanagari_count / total_chars, latin_count / total_chars
    
    def detect(self, text):
        text = str(text).strip()
        if not text:
            return 'unsupported', 'not_supported', 0.0
        
        devanagari_ratio, latin_ratio = self._get_script_ratio(text)
        
        if devanagari_ratio > 0.1 and latin_ratio > 0.1:
            return 'unsupported', 'not_supported (hinglish/mixed)', 0.0
        if devanagari_ratio > 0.7:
            return 'hi', 'hindi', devanagari_ratio
        if latin_ratio > 0.7:
            try:
                detected_lang = detect(text)
                if detected_lang == 'en':
                    return 'en', 'english', latin_ratio
                return 'unsupported', f'not_supported ({detected_lang})', 0.0
            except LangDetectException:
                return 'unsupported', 'not_supported', 0.0
        try:
            detected_lang = detect(text)
            if detected_lang in self.supported_languages:
                return detected_lang, self.supported_languages[detected_lang], 0.5
            return 'unsupported', f'not_supported ({detected_lang})', 0.0
        except LangDetectException:
            return 'unsupported', 'not_supported', 0.0

print("‚úÖ LanguageDetector defined!")

‚úÖ LanguageDetector defined!


In [None]:
class UnifiedHateSpeechClassifier:
    """Unified classifier with automatic language detection."""
    
    HUGGINGFACE_ENGLISH_MODEL = "Hate-speech-CNERG/bert-base-uncased-hatexplain"
    
    def __init__(self, english_model_path="./model", hindi_model_path="./hindi_text_classifier"):
        print("üöÄ Initializing Unified Classifier...")
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"üì± Device: {self.device}")
        
        self.language_detector = LanguageDetector()
        self._load_english_model(english_model_path)
        self._load_hindi_model(hindi_model_path)
        print("‚úÖ Classifier Ready!")
    
    def _load_english_model(self, path):
        print("üìö Loading English model...")
        local_weights = os.path.exists(os.path.join(path, "pytorch_model.bin")) or \
                        os.path.exists(os.path.join(path, "model.safetensors"))
        
        if local_weights:
            self.english_tokenizer = AutoTokenizer.from_pretrained(path)
            self.english_model = AutoModelForSequenceClassification.from_pretrained(path)
        else:
            print(f"   üì• Downloading from HuggingFace...")
            self.english_tokenizer = AutoTokenizer.from_pretrained(self.HUGGINGFACE_ENGLISH_MODEL)
            self.english_model = AutoModelForSequenceClassification.from_pretrained(self.HUGGINGFACE_ENGLISH_MODEL)
        
        self.english_labels = self.english_model.config.id2label
        self.english_model.to(self.device).eval()
    
    def _load_hindi_model(self, path):
        print("üìö Loading Hindi model...")
        self.hindi_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
        self.hindi_model = AutoModelForSequenceClassification.from_pretrained(path)
        self.hindi_labels = ['hate', 'normal', 'offensive']
        self.hindi_model.to(self.device).eval()
    
    def _predict(self, text, tokenizer, model, labels):
        inputs = tokenizer(text, truncation=True, padding=True, max_length=128, return_tensors='pt')
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)[0]
        pred_idx = torch.argmax(probs).item()
        prob_dict = {labels[i]: round(probs[i].item(), 4) for i in range(len(probs))}
        return labels[pred_idx], probs[pred_idx].item(), prob_dict
    
    def classify(self, text):
        text = str(text).strip()
        if not text:
            return {'text': text, 'detected_language': 'empty', 'prediction': 'error', 
                    'confidence': 0.0, 'probabilities': {}, 'model_used': None}
        
        lang_code, lang_name, _ = self.language_detector.detect(text)
        
        if lang_code == 'en':
            pred, conf, probs = self._predict(text, self.english_tokenizer, self.english_model, self.english_labels)
            model_used = 'English (BERT)'
        elif lang_code == 'hi':
            pred, conf, probs = self._predict(text, self.hindi_tokenizer, self.hindi_model, self.hindi_labels)
            model_used = 'Hindi (XLM-RoBERTa)'
        else:
            return {'text': text, 'detected_language': lang_name, 'prediction': 'NOT SUPPORTED',
                    'confidence': 0.0, 'probabilities': {}, 'model_used': None,
                    'message': f'Language "{lang_name}" is not supported.'}
        
        return {'text': text, 'detected_language': lang_name, 'prediction': pred,
                'confidence': round(conf, 4), 'probabilities': probs, 'model_used': model_used}

print("‚úÖ UnifiedHateSpeechClassifier defined!")

‚úÖ UnifiedHateSpeechClassifier defined!


In [None]:
# Initialize the classifier
classifier = UnifiedHateSpeechClassifier(
    english_model_path="./model",
    hindi_model_path="./hindi_text_classifier"
)

üöÄ Initializing Unified Classifier...
üì± Device: cpu
üìö Loading English model...
   üì• Downloading from HuggingFace...
üìö Loading Hindi model...
‚úÖ Classifier Ready!


In [None]:
def classify_text(text):
    """Classify text and return formatted results for Gradio."""
    if not text or not text.strip():
        return "‚ö†Ô∏è Please enter text", "‚ùì Unknown", "N/A", {}
    
    result = classifier.classify(text)
    lang = result['detected_language'].upper()
    
    if result['prediction'] == 'NOT SUPPORTED':
        return f"üåê {lang}", "‚ùå NOT SUPPORTED", result.get('message', 'Not supported'), {}
    
    pred = result['prediction'].upper()
    emoji = {'HATE': 'üî¥', 'NORMAL': 'üü¢', 'OFFENSIVE': 'üü°', 'HATE SPEECH': 'üî¥'}.get(pred, '‚ö™')
    model_info = f"ü§ñ {result['model_used']} | üìà {result['confidence']:.1%}"
    
    return f"üåê {lang}", f"{emoji} {pred}", model_info, result['probabilities']

# Create Gradio Interface (compatible with Gradio 6.x)
with gr.Blocks(title="Unified Hate Speech Classifier") as demo:
    
    gr.Markdown("""
    # üõ°Ô∏è Unified Hate Speech Classifier
    
    **Automatically detects language and classifies text as Hate Speech, Offensive, or Normal.**
    
    ‚úÖ Supports: **English** | **Hindi (‡§π‡§ø‡§Ç‡§¶‡•Ä)**  
    ‚ùå Not Supported: Hinglish, French, German, Japanese, etc.
    """)
    
    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Enter Text to Classify",
                placeholder="Type in English or Hindi...\n\nExample: 'Hello world!' or '‡§®‡§Æ‡§∏‡•ç‡§§‡•á ‡§¶‡•Å‡§®‡§ø‡§Ø‡§æ'",
                lines=4
            )
            classify_btn = gr.Button("üîç Classify", variant="primary")
        
        with gr.Column(scale=1):
            lang_out = gr.Textbox(label="Language", interactive=False)
            pred_out = gr.Textbox(label="Prediction", interactive=False)
            model_out = gr.Textbox(label="Model & Confidence", interactive=False)
    
    prob_out = gr.Label(label="Probability Distribution", num_top_classes=3)
    
    gr.Markdown("### üìù Examples:")
    gr.Examples(
        examples=[
            ["I love spending time with my family"],
            ["You are such a stupid idiot"],
            ["All immigrants should be deported"],
            ["‡§∏‡§¨‡§∏‡•á ‡§Ö‡§ö‡•ç‡§õ‡§æ ‡§™‡•ç‡§∞‡§ß‡§æ‡§®‡§Æ‡§Ç‡§§‡•ç‡§∞‡•Ä ‡§®‡§∞‡•á‡§Ç‡§¶‡•ç‡§∞ ‡§Æ‡•ã‡§¶‡•Ä ‡§π‡•à"],
            ["‡§Ø‡•á ‡§ï‡•Å‡§§‡•ç‡§§‡•á ‡§ï‡•Ä ‡§î‡§≤‡§æ‡§¶ ‡§π‡•à ‡§∏‡§¨"],
            ["‡§Ü‡§ú ‡§Æ‡•å‡§∏‡§Æ ‡§¨‡§π‡•Å‡§§ ‡§∏‡•Å‡§π‡§æ‡§µ‡§®‡§æ ‡§π‡•à"],
            ["Ye bahut bakwaas hai ‡§Ø‡§æ‡§∞"],
            ["Je t'aime beaucoup"],
        ],
        inputs=text_input
    )
    
    classify_btn.click(fn=classify_text, inputs=text_input, outputs=[lang_out, pred_out, model_out, prob_out])
    text_input.submit(fn=classify_text, inputs=text_input, outputs=[lang_out, pred_out, model_out, prob_out])
    
    gr.Markdown("---\n**Legend:** üü¢ Normal | üü° Offensive | üî¥ Hate Speech")

print("‚úÖ Gradio interface ready! Run next cell to launch.")

‚úÖ Gradio interface ready! Run next cell to launch.


In [None]:
# üöÄ Launch the Gradio interface
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




