In [2]:
# Install required libraries
!pip install transformers torch pandas numpy scikit-learn

# Import libraries
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pickle
import zipfile
import os



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Method 2: Direct loading without pickle (more reliable)
class SimpleHindiClassifier:
    def __init__(self, model_dir="hindi_text_classifier"):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Load tokenizer (use the original model name)
        self.tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
        
        # Load the trained model
        self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        self.model.to(self.device)
        self.model.eval()
        
        # Class labels (based on your training)
        self.labels = ['hate', 'normal', 'offensive']
        
        print("Model loaded successfully!")
    
    def predict(self, text, return_probabilities=False):
        """Classify Hindi text into hate, normal, or offensive"""
        # Tokenize input
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=128
        )
        
        # Move to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        # Make prediction
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits
            probabilities = torch.nn.functional.softmax(logits, dim=1)
            prediction = torch.argmax(probabilities, dim=1).item()
            confidence = probabilities[0][prediction].item()
        
        result = {
            'text': text,
            'prediction': self.labels[prediction],
            'confidence': confidence
        }
        
        if return_probabilities:
            result['probabilities'] = {
                self.labels[i]: probabilities[0][i].item() 
                for i in range(len(self.labels))
            }
        
        return result
    
    def predict_batch(self, texts, return_probabilities=False):
        """Classify multiple texts"""
        results = []
        for text in texts:
            results.append(self.predict(text, return_probabilities))
        return results

# Create classifier
classifier = SimpleHindiClassifier()

# Test
print("\n" + "="*60)
print("HINDI TEXT CLASSIFICATION MODEL")
print("="*60)

test_cases = [
    ("सबसे अच्छा प्रधानमंत्री नरेंद्र मोदी है", "normal"),
    ("ये कुत्ते की औलाद है सब", "hate"),
    ("तुम लोग बहुत ही नालायक हो", "offensive"),
    ("आज मौसम बहुत सुहावना है", "normal"),
    ("मुसलमान सभी आतंकवादी होते हैं", "hate")
]

print("\nTest Results:")
print("-" * 60)

for text, expected in test_cases:
    result = classifier.predict(text, return_probabilities=True)
    
    print(f"\nText: {text}")
    print(f"Expected: {expected}")
    print(f"Predicted: {result['prediction']}")
    print(f"Confidence: {result['confidence']:.1%}")
    
    if 'probabilities' in result:
        print("Probabilities:")
        for label, prob in result['probabilities'].items():
            print(f"  {label}: {prob:.1%}")
    
    # Check if prediction matches expected
    if result['prediction'] == expected:
        print("✓ Correct!")
    else:
        print("✗ Incorrect")

print("\n" + "="*60)
print("MODEL READY FOR USE")
print("="*60)

Model loaded successfully!

HINDI TEXT CLASSIFICATION MODEL

Test Results:
------------------------------------------------------------

Text: सबसे अच्छा प्रधानमंत्री नरेंद्र मोदी है
Expected: normal
Predicted: normal
Confidence: 98.6%
Probabilities:
  hate: 0.9%
  normal: 98.6%
  offensive: 0.5%
✓ Correct!

Text: ये कुत्ते की औलाद है सब
Expected: hate
Predicted: hate
Confidence: 83.9%
Probabilities:
  hate: 83.9%
  normal: 0.3%
  offensive: 15.8%
✓ Correct!

Text: तुम लोग बहुत ही नालायक हो
Expected: offensive
Predicted: offensive
Confidence: 57.6%
Probabilities:
  hate: 37.5%
  normal: 4.9%
  offensive: 57.6%
✓ Correct!

Text: आज मौसम बहुत सुहावना है
Expected: normal
Predicted: normal
Confidence: 98.3%
Probabilities:
  hate: 1.2%
  normal: 98.3%
  offensive: 0.5%
✓ Correct!

Text: मुसलमान सभी आतंकवादी होते हैं
Expected: hate
Predicted: hate
Confidence: 50.6%
Probabilities:
  hate: 50.6%
  normal: 1.7%
  offensive: 47.7%
✓ Correct!

MODEL READY FOR USE
