In [39]:
#import shutil
#shutil.rmtree('/content/metrics', ignore_errors=True)

# XLM-RoBERTa

In [40]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_recall_fscore_support
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    XLMRobertaTokenizer,
    XLMRobertaForSequenceClassification
)
from datasets import Dataset
import tempfile
import subprocess
import sys

# IMPORT LIBRARIES AND SETUP

In [41]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_recall_fscore_support
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import Dataset
import tempfile
import subprocess
import sys

In [42]:
print("Checking and installing dependencies...")
install_requirements()

Checking and installing dependencies...
Installing scikit-learn...


In [43]:
try:
    from optimum.onnxruntime import ORTModelForSequenceClassification
    from optimum.onnxruntime.configuration import OptimizationConfig
    ONNX_AVAILABLE = True
except ImportError:
    print("Optimum ONNX Runtime not available, ONNX export will be limited")
    ONNX_AVAILABLE = False

Optimum ONNX Runtime not available, ONNX export will be limited


In [44]:
print("All dependencies loaded successfully!")

All dependencies loaded successfully!


# LOAD AND PREPARE DATASET

In [45]:
def load_dataset(csv_path='dataset.csv'):
    print(f"Loading multilingual dataset from {csv_path}...")

    try:
        df = pd.read_csv(csv_path)
        print(f"Dataset loaded successfully. Shape: {df.shape}")

        # Validate required columns
        required_cols = ['text', 'label']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Clean and validate data
        df = df.dropna(subset=['text', 'label'])
        df['text'] = df['text'].astype(str)
        df['label'] = df['label'].astype(int)

        # Validate and normalize labels
        unique_labels = sorted(df['label'].unique())
        num_labels = len(unique_labels)

        # Ensure labels are sequential starting from 0
        if unique_labels != list(range(num_labels)):
            print("Warning: Labels are not sequential starting from 0. Remapping...")
            label_mapping = {old_label: new_label for new_label, old_label in enumerate(unique_labels)}
            df['label'] = df['label'].map(label_mapping)
            print(f"Label mapping: {label_mapping}")

        print(f"Dataset validation complete. Clean shape: {df.shape}")
        print(f"Number of classes: {num_labels}")
        print(f"Label distribution:\n{df['label'].value_counts().sort_index()}")

        return df

    except FileNotFoundError:
        print(f"Error: Dataset file '{csv_path}' not found!")
        print("Creating a sample multilingual dataset for demonstration...")
        return create_sample_dataset()

In [46]:

def create_sample_dataset():
    sample_data = {
        'text': [
            # English samples (labels 0-2)
            'This product is absolutely amazing and works perfectly!',
            'Great quality and excellent customer service.',
            'I highly recommend this to everyone, outstanding performance.',
            'Love the design and functionality, very satisfied.',
            'Exceptional value for money, exceeded expectations.',

            'The product is okay, meets basic requirements.',
            'Average quality, nothing special but works fine.',
            'Standard features, typical for this price range.',
            'It works as described, no major complaints.',
            'Decent product, could be better but acceptable.',

            'Terrible quality, completely disappointed with purchase.',
            'Worst customer service experience ever encountered.',
            'Product broke after just one day of use.',
            'Would not recommend, waste of money and time.',
            'Poor build quality and unreliable performance.',

            # Spanish samples
            'Este producto es absolutamente increíble, funciona perfectamente.',
            'Excelente calidad y servicio al cliente excepcional.',
            'Lo recomiendo mucho, rendimiento extraordinario y confiable.',
            'Me encanta el diseño, muy satisfecho con la compra.',
            'Valor excepcional, superó todas mis expectativas completamente.',

            'El producto está bien, cumple con lo básico necesario.',
            'Calidad promedio, nada especial pero funciona correctamente.',
            'Características estándar, típico para este rango de precios.',
            'Funciona como se describe, sin quejas importantes.',
            'Producto decente, podría ser mejor pero aceptable.',

            'Calidad terrible, completamente decepcionado con la compra realizada.',
            'La peor experiencia de servicio al cliente jamás experimentada.',
            'El producto se rompió después de solo un día.',
            'No lo recomendaría, pérdida de dinero y tiempo.',
            'Mala calidad de construcción y rendimiento poco confiable.',

            # Tagalog samples
            'Napakaganda ng produktong ito, perpektong gumagana!',
            'Napakahusay ng kalidad at serbisyo sa customer.',
            'Highly recommended ko ito sa lahat, outstanding performance.',
            'Love ko ang design, very satisfied sa purchase.',
            'Exceptional value for money, sobra sa expectations.',

            'Okay lang ang produkto, nakakameet ng basic requirements.',
            'Average quality, walang special pero gumagana naman.',
            'Standard features, typical sa price range na ito.',
            'Gumagana naman as described, walang major complaints.',
            'Decent product, pwede pa mas better pero acceptable.',

            'Napakasama ng quality, disappointed ako sa purchase.',
            'Worst customer service experience na naranasan ko.',
            'Nasira ang produkto after one day lang.',
            'Hindi ko irerekumenda, sayang ang pera at oras.',
            'Pangit ng build quality at hindi reliable performance.',

            # French samples
            'Ce produit est absolument incroyable, fonctionne parfaitement bien!',
            'Excellente qualité et service client exceptionnel et professionnel.',
            'Je le recommande vivement, performances extraordinaires et fiables.',
            'J\'adore le design, très satisfait de cet achat.',
            'Valeur exceptionnelle, a dépassé toutes mes attentes complètement.',

            'Le produit est correct, répond aux exigences de base.',
            'Qualité moyenne, rien de spécial mais fonctionne bien.',
            'Fonctionnalités standard, typique pour cette gamme de prix.',
            'Fonctionne comme décrit, pas de plaintes majeures importantes.',
            'Produit décent, pourrait être mieux mais acceptable globalement.',

            'Qualité terrible, complètement déçu de cet achat récent.',
            'La pire expérience de service client jamais vécue.',
            'Le produit s\'est cassé après seulement une journée.',
            'Je ne le recommanderais pas, perte d\'argent.',
            'Mauvaise qualité de construction et performances peu fiables.',

            # German samples
            'Dieses Produkt ist absolut fantastisch und funktioniert perfekt!',
            'Ausgezeichnete Qualität und hervorragender Kundenservice immer.',
            'Ich empfehle es sehr, außergewöhnliche Leistung und Zuverlässigkeit.',
            'Ich liebe das Design, sehr zufrieden mit dem Kauf.',
            'Außergewöhnlicher Wert, hat alle Erwartungen vollständig übertroffen.',

            'Das Produkt ist okay, erfüllt die grundlegenden Anforderungen gut.',
            'Durchschnittliche Qualität, nichts Besonderes aber funktioniert einwandfrei.',
            'Standard-Features, typisch für diese Preisklasse und Kategorie.',
            'Funktioniert wie beschrieben, keine größeren Beschwerden vorhanden.',
            'Anständiges Produkt, könnte besser sein aber akzeptabel.',

            'Schreckliche Qualität, völlig enttäuscht von diesem Kauf heute.',
            'Die schlechteste Kundenservice-Erfahrung, die ich je gemacht habe.',
            'Das Produkt ging nach nur einem Tag kaputt.',
            'Würde ich nicht empfehlen, Geld- und Zeitverschwendung.',
            'Schlechte Bauqualität und unzuverlässige Leistung durchgehend.',
        ],
        'label': (
            # English: 5 positive, 5 neutral, 5 negative
            [2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] +
            # Spanish: 5 positive, 5 neutral, 5 negative
            [2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] +
            # Tagalog: 5 positive, 5 neutral, 5 negative
            [2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] +
            # French: 5 positive, 5 neutral, 5 negative
            [2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] +
            # German: 5 positive, 5 neutral, 5 negative
            [2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
        )
    }

    df = pd.DataFrame(sample_data)
    df.to_csv('dataset.csv', index=False)
    print("Sample multilingual sentiment dataset created and saved as 'dataset.csv'")
    print("Dataset includes English, Spanish, Tagalog, French, and German samples")
    print("Labels: 0=Negative, 1=Neutral, 2=Positive")
    print(f"Total samples: {len(df)} across 5 languages")
    return df

In [47]:
def split_dataset(df, test_size=0.2, random_state=42):
    print(f"Splitting dataset: {test_size*100}% for validation...")

    X_train, X_val, y_train, y_val = train_test_split(
        df['text'].tolist(),
        df['label'].tolist(),
        test_size=test_size,
        random_state=random_state,
        stratify=df['label']
    )

    print(f"Training set: {len(X_train)} samples")
    print(f"Validation set: {len(X_val)} samples")

    return X_train, X_val, y_train, y_val

# TOKENIZATION AND PREPROCESSING

In [48]:
def create_tokenized_datasets(X_train, X_val, y_train, y_val, model_name):
    """Tokenize the datasets using the XLM-RoBERTa tokenizer"""
    print("Loading XLM-RoBERTa tokenizer and creating tokenized datasets...")

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # XLM-RoBERTa uses SentencePiece tokenizer, similar to RoBERTa
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print(f"Tokenizer vocabulary size: {tokenizer.vocab_size}")
    print(f"Model max length: {tokenizer.model_max_length}")

    # Create datasets
    train_dataset = Dataset.from_dict({
        'text': X_train,
        'labels': y_train
    })

    val_dataset = Dataset.from_dict({
        'text': X_val,
        'labels': y_val
    })

    def tokenize_function(examples):
        return tokenizer(
            examples['text'],
            truncation=True,
            padding=False,  # Will be handled by data collator
            max_length=256  # Good for multilingual text processing
        )

    # Tokenize datasets
    print("Tokenizing training dataset...")
    train_tokenized = train_dataset.map(tokenize_function, batched=True)

    print("Tokenizing validation dataset...")
    val_tokenized = val_dataset.map(tokenize_function, batched=True)

    print("Tokenization complete!")
    print(f"Sample tokenized text length: {len(train_tokenized[0]['input_ids'])}")

    return train_tokenized, val_tokenized, tokenizer

# MODEL TRAINING

In [49]:
def train_model(train_dataset, val_dataset, tokenizer, model_name, output_dir, num_labels):
    """Train the XLM-RoBERTa model for multilingual classification"""
    print("Initializing XLM-RoBERTa model for multilingual classification training...")

    # Load model with appropriate number of labels
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels,
        problem_type="single_label_classification"
    )

    print(f"Model loaded with {model.num_parameters():,} parameters")
    print(f"Model architecture: {model.config.hidden_size} hidden, {model.config.num_hidden_layers} layers")

    # Ensure model uses the correct pad_token_id
    if tokenizer.pad_token_id is not None:
        model.config.pad_token_id = tokenizer.pad_token_id

    # Data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Training arguments optimized for XLM-RoBERTa
    training_args_dict = {
        'output_dir': output_dir,
        'num_train_epochs': 3,  # Standard for fine-tuning large models
        'per_device_train_batch_size': 8,  # Conservative for 270M parameter model
        'per_device_eval_batch_size': 8,
        'learning_rate': 2e-5,  # Lower learning rate for large pre-trained model
        'weight_decay': 0.01,
        'warmup_ratio': 0.06,  # 6% warmup recommended for RoBERTa
        'logging_dir': f'{output_dir}/logs',
        'logging_steps': 25,
        'save_total_limit': 2,
        'load_best_model_at_end': True,
        'metric_for_best_model': "eval_f1_weighted",
        'greater_is_better': True,
        'report_to': [],
        'seed': 42,
        'dataloader_num_workers': 0,
        'remove_unused_columns': True,
        'fp16': True,  # Mixed precision for efficiency
        'dataloader_pin_memory': False,
        'gradient_checkpointing': True,  # Save memory for large model
        'max_grad_norm': 1.0,  # Gradient clipping
        'lr_scheduler_type': 'cosine',  # Cosine learning rate schedule
    }

    # Add version-specific parameters
    if hasattr(TrainingArguments, 'eval_strategy'):
        training_args_dict['eval_strategy'] = "steps"
        training_args_dict['eval_steps'] = 100
        training_args_dict['save_strategy'] = "steps"
        training_args_dict['save_steps'] = 100
    else:
        training_args_dict['evaluation_strategy'] = "steps"
        training_args_dict['eval_steps'] = 100
        training_args_dict['save_strategy'] = "steps"
        training_args_dict['save_steps'] = 100

    training_args = TrainingArguments(**training_args_dict)

    def compute_metrics(eval_pred):
        predictions, labels = eval_pred

        # Handle different prediction formats
        if isinstance(predictions, tuple):
            predictions = predictions[0]

        # Convert to numpy array if it's a tensor
        if hasattr(predictions, 'numpy'):
            predictions = predictions.numpy()

        predictions = np.argmax(predictions, axis=1)

        # Calculate comprehensive metrics
        accuracy = accuracy_score(labels, predictions)
        f1_macro = f1_score(labels, predictions, average='macro')
        f1_weighted = f1_score(labels, predictions, average='weighted')
        precision_macro = precision_recall_fscore_support(labels, predictions, average='macro')[0]
        recall_macro = precision_recall_fscore_support(labels, predictions, average='macro')[1]

        return {
            'accuracy': accuracy,
            'f1_macro': f1_macro,
            'f1_weighted': f1_weighted,
            'precision_macro': precision_macro,
            'recall_macro': recall_macro,
        }

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    print("Starting training...")
    trainer.train()

    # Save the best model
    print(f"Saving model to {output_dir}...")
    trainer.save_model()
    tokenizer.save_pretrained(output_dir)

    return trainer, model

# MODEL EVALUATION

In [50]:
def evaluate_model(trainer, X_val, y_val, output_dir, num_labels):
    print("Evaluating XLM-RoBERTa model performance...")

    # Get predictions
    eval_results = trainer.evaluate()

    # Get detailed predictions for classification report
    predictions = trainer.predict(trainer.eval_dataset)

    # Extract predictions from the prediction object
    if hasattr(predictions, 'predictions'):
        preds = predictions.predictions
    else:
        preds = predictions[0]

    # Convert to numpy array if it's a tensor
    if hasattr(preds, 'numpy'):
        preds = preds.numpy()

    y_pred = np.argmax(preds, axis=1)

    # Generate classification report
    if num_labels == 2:
        target_names = ['NSFW', 'Safe']
    elif num_labels == 3:
        # Map: 0=NSFW, 1=Safe, 2=Safe (combining neutral and positive as Safe)
        target_names = ['NSFW', 'Safe', 'Safe']
        # Remap predictions: combine classes 1 and 2 into 'Safe'
        y_val_binary = [0 if label == 0 else 1 for label in y_val]
        y_pred_binary = [0 if pred == 0 else 1 for pred in y_pred]

        # Use binary classification for final report
        report = classification_report(
            y_val_binary,
            y_pred_binary,
            target_names=['NSFW', 'Safe'],
            digits=4
        )

        # Calculate binary metrics
        accuracy = accuracy_score(y_val_binary, y_pred_binary)
        f1_macro = f1_score(y_val_binary, y_pred_binary, average='macro')
        f1_weighted = f1_score(y_val_binary, y_pred_binary, average='weighted')
        precision_macro = precision_recall_fscore_support(y_val_binary, y_pred_binary, average='macro')[0]
        recall_macro = precision_recall_fscore_support(y_val_binary, y_pred_binary, average='macro')[1]
    else:
        target_names = [f'Class_{i}' for i in range(num_labels)]
        report = classification_report(
            y_val,
            y_pred,
            target_names=target_names,
            digits=4
        )

        accuracy = accuracy_score(y_val, y_pred)
        f1_macro = f1_score(y_val, y_pred, average='macro')
        f1_weighted = f1_score(y_val, y_pred, average='weighted')
        precision_macro = precision_recall_fscore_support(y_val, y_pred, average='macro')[0]
        recall_macro = precision_recall_fscore_support(y_val, y_pred, average='macro')[1]

    # Prepare metrics text
    task_description = "Multilingual Content Safety Classification (NSFW vs Safe)" if num_labels <= 3 else f"Multilingual Text Classification ({num_labels} classes)"

    metrics_text = f"""XLM-RoBERTa Content Safety Classification Model Evaluation Results
{'='*75}

Model: FacebookAI/xlm-roberta-base
Task: {task_description}
Architecture: 12 layers, 768 hidden units, 270M parameters
Languages: Supports 100+ languages including major world languages

Performance Metrics:
{'-'*40}
Accuracy: {accuracy:.4f}
F1-Score (Macro): {f1_macro:.4f}
F1-Score (Weighted): {f1_weighted:.4f}
Precision (Macro): {precision_macro:.4f}
Recall (Macro): {recall_macro:.4f}

Classification Report:
{report}

Training Results:
{'-'*40}
"""

    for key, value in eval_results.items():
        if isinstance(value, (int, float)):
            metrics_text += f"{key}: {value:.4f}\n"

    # Save metrics
    os.makedirs('metrics', exist_ok=True)
    metrics_path = 'metrics/xlm_roberta_metrics.txt'

    with open(metrics_path, 'w') as f:
        f.write(metrics_text)

    print(f"Metrics saved to {metrics_path}")
    print(f"Validation Accuracy: {accuracy:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    print(f"F1-Score (Weighted): {f1_weighted:.4f}")

    return accuracy, report

# ONNX EXPORT

In [51]:
def export_to_onnx(model_dir, onnx_path):
    print("Exporting XLM-RoBERTa model to ONNX format...")

    try:
        # Load the trained model
        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        # Fix: Load tokenizer from original model name instead of saved directory
        tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

        # Create dummy input with multilingual sample
        dummy_input = tokenizer(
            "This is a multilingual text sample for ONNX export testing",
            return_tensors="pt",
            max_length=256,
            padding="max_length",
            truncation=True
        )

        # Export to ONNX with optimization for XLM-RoBERTa
        os.makedirs(os.path.dirname(onnx_path), exist_ok=True)
        torch.onnx.export(
            model,
            tuple(dummy_input.values()),
            onnx_path,
            export_params=True,
            opset_version=14,
            do_constant_folding=True,
            input_names=['input_ids', 'attention_mask'],
            output_names=['logits'],
            dynamic_axes={
                'input_ids': {0: 'batch_size', 1: 'sequence'},
                'attention_mask': {0: 'batch_size', 1: 'sequence'},
                'logits': {0: 'batch_size'}
            }
        )

        print(f"ONNX model exported to: {onnx_path}")

        # Get model size
        model_size = os.path.getsize(onnx_path) / (1024 * 1024)
        print(f"ONNX model size: {model_size:.2f} MB")

        return True

    except Exception as e:
        print(f"ONNX export failed: {e}")
        import traceback
        traceback.print_exc()
        return False

# TENSORFLOW LITE EXPORT

In [52]:
def export_to_tflite_from_pt(model_dir, tflite_path):
    try:
        import tensorflow as tf
        from transformers import TFAutoModelForSequenceClassification

        print("Converting XLM-RoBERTa PyTorch model to TensorFlow...")

        # Load and convert to TensorFlow
        tf_model = TFAutoModelForSequenceClassification.from_pretrained(
            model_dir,
            from_pt=True
        )

        # Save as TensorFlow SavedModel
        tf_saved_model_dir = os.path.join(model_dir, "tf_saved_model")
        tf.saved_model.save(tf_model, tf_saved_model_dir)
        print(f"Saved intermediate TensorFlow model to {tf_saved_model_dir}")

        # Convert to TFLite with optimizations
        converter = tf.lite.TFLiteConverter.from_saved_model(tf_saved_model_dir)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

        # Additional optimizations for mobile deployment
        converter.target_spec.supported_types = [tf.float16]

        tflite_model = converter.convert()

        # Save TFLite model
        os.makedirs(os.path.dirname(tflite_path), exist_ok=True)
        with open(tflite_path, "wb") as f:
            f.write(tflite_model)

        # Get model size
        model_size = os.path.getsize(tflite_path) / (1024 * 1024)
        print(f"TFLite model successfully exported to: {tflite_path}")
        print(f"TFLite model size: {model_size:.2f} MB")

        return True

    except Exception as e:
        print(f"TensorFlow Lite export failed: {e}")
        import traceback
        traceback.print_exc()
        return False

# MAIN

In [53]:
def main():
    print("Starting XLM-RoBERTa Multilingual Text Classification Pipeline")
    print("="*75)

    # Configuration - Updated to use XLM-RoBERTa model
    MODEL_NAME = "FacebookAI/xlm-roberta-base"
    OUTPUT_DIR = "models/xlm_roberta_classification"
    ONNX_PATH = "models/xlm_roberta_classification_model.onnx"
    TFLITE_PATH = "models/xlm_roberta_classification_model.tflite"

    # Create output directories
    os.makedirs("models", exist_ok=True)
    os.makedirs("metrics", exist_ok=True)

    print(f"Using model: {MODEL_NAME}")
    print(f"Output directory: {OUTPUT_DIR}")

    # Step 1: Load dataset
    df = load_dataset()
    num_labels = len(df['label'].unique())

    # Step 2: Split dataset
    X_train, X_val, y_train, y_val = split_dataset(df)

    # Step 3: Create tokenized datasets
    train_dataset, val_dataset, tokenizer = create_tokenized_datasets(
        X_train, X_val, y_train, y_val, MODEL_NAME
    )

    # Step 4: Train model
    trainer, model = train_model(
        train_dataset, val_dataset, tokenizer, MODEL_NAME, OUTPUT_DIR, num_labels
    )

    # Step 5: Evaluate model
    accuracy, report = evaluate_model(trainer, X_val, y_val, OUTPUT_DIR, num_labels)

    # Step 6: Export to ONNX
    onnx_success = export_to_onnx(OUTPUT_DIR, ONNX_PATH)

    # Step 7: Export to TFLite
    tflite_success = export_to_tflite_from_pt(OUTPUT_DIR, TFLITE_PATH)

    # Final output
    print("\n" + "="*75)
    print("XLM-RoBERTa Multilingual Classification Training Complete!")

    if onnx_success:
        print(f"✅ ONNX model: {ONNX_PATH}")
    else:
        print("❌ ONNX export: FAILED")

    if tflite_success:
        print(f"✅ TFLite model: {TFLITE_PATH}")
    else:
        print("❌ TFLite export: FAILED")

    print(f"\nModel checkpoints: {OUTPUT_DIR}")
    print(f"Metrics: metrics/xlm_roberta_metrics.txt")
    print(f"Final validation accuracy: {accuracy:.4f}")

# INFERENCE

In [54]:
def test_inference(model_dir, test_texts=None):
    if test_texts is None:
        test_texts = [
            # English samples
            "I absolutely love this amazing product, it's fantastic!",
            "The product is okay, nothing particularly special about it.",
            "Terrible quality, I'm completely disappointed with this purchase.",

            # Spanish samples
            "Este producto es absolutamente increÃ­ble, me encanta mucho.",
            "El producto estÃ¡ bien, nada especialmente notable o destacable.",
            "Calidad horrible, estoy completamente decepcionado con la compra.",

            # Tagalog samples
            "Napakaganda ng produktong ito, sobrang satisfied ako dito!",
            "Okay lang naman ang produkto, walang special na features.",
            "Sobrang pangit ng quality, disappointed ako sa purchase na ito.",

            # French samples
            "J'adore absolument ce produit, il est vraiment fantastique!",
            "Le produit est correct, rien de particuliÃ¨rement remarquable vraiment.",
            "QualitÃ© horrible, je suis complÃ¨tement dÃ©Ã§u de cet achat rÃ©cent.",

            # German samples
            "Ich liebe dieses Produkt absolut, es ist wirklich fantastisch!",
            "Das Produkt ist okay, nichts besonders Bemerkenswertes daran wirklich.",
            "Schreckliche QualitÃ¤t, ich bin vÃ¶llig enttÃ¤uscht von diesem Kauf.",

            # Italian samples
            "Amo assolutamente questo prodotto, Ã¨ davvero fantastico e perfetto!",
            "Il prodotto va bene, niente di particolarmente notevole o speciale.",
            "QualitÃ  orribile, sono completamente deluso da questo acquisto recente.",

            # Portuguese samples
            "Eu amo absolutamente este produto, Ã© realmente fantÃ¡stico e perfeito!",
            "O produto estÃ¡ bem, nada particularmente notÃ¡vel ou especial mesmo.",
            "Qualidade horrÃ­vel, estou completamente decepcionado com esta compra."
        ]

    print("\nTesting trained XLM-RoBERTa multilingual model...")

    try:
        # Load model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        # Fix: Load tokenizer from original model name instead of saved directory
        tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

        model.eval()

        # Determine label names based on number of labels
        num_labels = model.config.num_labels
        if num_labels == 2:
            label_names = ["NSFW", "Safe"]
        elif num_labels == 3:
            label_names = ["NSFW", "Safe", "Safe"]  # 0=NSFW, 1=Safe, 2=Safe
        else:
            label_names = [f"Class_{i}" for i in range(num_labels)]

        language_names = ["English", "Spanish", "Tagalog", "French", "German", "Italian", "Portuguese"]

        for i, text in enumerate(test_texts):
            # Determine language
            lang_idx = i // 3  # 3 samples per language
            language = language_names[lang_idx] if lang_idx < len(language_names) else "Unknown"

            # Tokenize
            inputs = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                max_length=256,
                padding=True
            )

            # Predict
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(predictions, dim=-1).item()
                confidence = predictions[0][predicted_class].item()

            # Map predictions to labels for display
            if num_labels == 3:
                # Convert 3-class to binary for display
                display_class = 0 if predicted_class == 0 else 1
                display_label = "NSFW" if display_class == 0 else "Safe"
                # Calculate combined Safe confidence for classes 1 and 2
                if predicted_class == 0:
                    display_confidence = confidence
                else:
                    display_confidence = (predictions[0][1] + predictions[0][2]).item()
            else:
                display_label = label_names[predicted_class] if predicted_class < len(label_names) else f"Class_{predicted_class}"
                display_confidence = confidence

            print(f"[{language}] Text: '{text[:80]}{'...' if len(text) > 80 else ''}'")
            print(f"  -> {display_label} (confidence: {display_confidence:.4f})")

            # Show probabilities
            if num_labels == 3:
                nsfw_prob = predictions[0][0].item()
                safe_prob = (predictions[0][1] + predictions[0][2]).item()
                print(f"  Probabilities: NSFW={nsfw_prob:.3f}, Safe={safe_prob:.3f}")
            else:
                probs_str = ", ".join([f"{label_names[j] if j < len(label_names) else f'Class_{j}'}={predictions[0][j].item():.3f}"
                                     for j in range(num_labels)])
                print(f"  Probabilities: {probs_str}")
            print()

    except Exception as e:
        print(f"Inference test failed: {e}")
        import traceback
        traceback.print_exc()

# PROGRAM EXECUTION

In [55]:
if __name__ == "__main__":
    try:
        # Run the main pipeline
        main()

        # Optional: Test inference
        test_inference("models/xlm_roberta_classification")

    except KeyboardInterrupt:
        print("\nTraining interrupted by user.")
    except Exception as e:
        print(f"Error during execution: {e}")
        import traceback
        traceback.print_exc()

    print("\nProgram execution completed.")

Starting XLM-RoBERTa Multilingual Text Classification Pipeline
Using model: FacebookAI/xlm-roberta-base
Output directory: models/xlm_roberta_classification
Loading multilingual dataset from dataset.csv...
Error: Dataset file 'dataset.csv' not found!
Creating a sample multilingual dataset for demonstration...
Sample multilingual sentiment dataset created and saved as 'dataset.csv'
Dataset includes English, Spanish, Tagalog, French, and German samples
Labels: 0=Negative, 1=Neutral, 2=Positive
Total samples: 75 across 5 languages
Splitting dataset: 20.0% for validation...
Training set: 60 samples
Validation set: 15 samples
Loading XLM-RoBERTa tokenizer and creating tokenized datasets...
Tokenizer vocabulary size: 250002
Model max length: 512
Tokenizing training dataset...


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Tokenizing validation dataset...


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Tokenization complete!
Sample tokenized text length: 15
Initializing XLM-RoBERTa model for multilingual classification training...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Model loaded with 278,045,955 parameters
Model architecture: 768 hidden, 12 layers
Starting training...


Step,Training Loss,Validation Loss


Saving model to models/xlm_roberta_classification...
Evaluating XLM-RoBERTa model performance...


Metrics saved to metrics/xlm_roberta_metrics.txt
Validation Accuracy: 0.8000
F1-Score (Macro): 0.7619
F1-Score (Weighted): 0.7937
Exporting XLM-RoBERTa model to ONNX format...


  torch.onnx.export(
  inverted_mask = torch.tensor(1.0, dtype=dtype) - expanded_mask


ONNX model exported to: models/xlm_roberta_classification_model.onnx
ONNX model size: 1060.97 MB
Converting XLM-RoBERTa PyTorch model to TensorFlow...


All PyTorch model weights were used when initializing TFXLMRobertaForSequenceClassification.

All the weights of TFXLMRobertaForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaForSequenceClassification for predictions without further training.


Saved intermediate TensorFlow model to models/xlm_roberta_classification/tf_saved_model
TFLite model successfully exported to: models/xlm_roberta_classification_model.tflite
TFLite model size: 530.62 MB

XLM-RoBERTa Multilingual Classification Training Complete!
✅ ONNX model: models/xlm_roberta_classification_model.onnx
✅ TFLite model: models/xlm_roberta_classification_model.tflite

Model checkpoints: models/xlm_roberta_classification
Metrics: metrics/xlm_roberta_metrics.txt
Final validation accuracy: 0.8000

Testing trained XLM-RoBERTa multilingual model...
[English] Text: 'I absolutely love this amazing product, it's fantastic!'
  -> Safe (confidence: 0.6567)
  Probabilities: NSFW=0.343, Safe=0.657

[English] Text: 'The product is okay, nothing particularly special about it.'
  -> Safe (confidence: 0.6635)
  Probabilities: NSFW=0.337, Safe=0.663

[English] Text: 'Terrible quality, I'm completely disappointed with this purchase.'
  -> Safe (confidence: 0.6517)
  Probabilities: NSFW=0.