In [22]:
#import shutil
#shutil.rmtree('/content/models', ignore_errors=True)

# Microsoft XtremeDistil RoBERTa

In [23]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import Dataset
import tempfile
import subprocess
import sys

# IMPORT LIBRARIES AND SETUP

In [24]:
def install_requirements():
    required_packages = [
        'transformers[torch]',
        'datasets',
        'torch',
        'pandas',
        'scikit-learn',
        'onnx',
        'onnxruntime',
        'optimum[onnxruntime]',  # This enables ORTModelForSequenceClassification
        'tensorflow',
    ]

    for package in required_packages:
        try:
            __import__(package.split('[')[0])
        except ImportError:
            print(f"Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])

In [25]:
print("Checking and installing dependencies...")
install_requirements()

Checking and installing dependencies...
Installing scikit-learn...


In [26]:
try:
    from optimum.onnxruntime import ORTModelForSequenceClassification
    from optimum.onnxruntime.configuration import OptimizationConfig
    ONNX_AVAILABLE = True
except ImportError:
    print("Optimum ONNX Runtime not available, ONNX export will be limited")
    ONNX_AVAILABLE = False

Optimum ONNX Runtime not available, ONNX export will be limited


In [27]:
print("All dependencies loaded successfully!")

All dependencies loaded successfully!


# LOAD AND PREPARE DATASET

In [28]:
def load_dataset(csv_path='dataset.csv'):
    print(f"Loading dataset from {csv_path}...")

    try:
        df = pd.read_csv(csv_path)
        print(f"Dataset loaded successfully. Shape: {df.shape}")

        # Validate required columns
        required_cols = ['text', 'label']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Clean and validate data
        df = df.dropna(subset=['text', 'label'])
        df['text'] = df['text'].astype(str)
        df['label'] = df['label'].astype(int)

        # Check label range - XtremeDistil can handle multi-class classification
        unique_labels = sorted(df['label'].unique())
        num_labels = len(unique_labels)

        # Ensure labels are sequential starting from 0
        if unique_labels != list(range(num_labels)):
            print("Warning: Labels are not sequential starting from 0. Remapping...")
            label_mapping = {old_label: new_label for new_label, old_label in enumerate(unique_labels)}
            df['label'] = df['label'].map(label_mapping)
            print(f"Label mapping: {label_mapping}")

        print(f"Dataset validation complete. Clean shape: {df.shape}")
        print(f"Number of classes: {num_labels}")
        print(f"Label distribution:\n{df['label'].value_counts().sort_index()}")

        return df

    except FileNotFoundError:
        print(f"Error: Dataset file '{csv_path}' not found!")
        print("Creating a sample multilingual dataset for demonstration...")
        return create_sample_dataset()

In [29]:
def create_sample_dataset():
    sample_data = {
        'text': [
            # Positive sentiment (label 0) - 13 items
            'I love this product, it works perfectly!',
            'This is an amazing experience, highly recommended!',
            'Great quality and fast delivery service.',
            'Excellent customer support and user-friendly interface.',
            'Outstanding performance and great value for money.',
            'Me encanta este producto, funciona perfectamente.',
            'Excelente calidad y servicio rápido.',
            'Gran experiencia de usuario, muy recomendado.',
            'Napakaganda ng produktong ito, sulit na sulit.',
            'Magandang serbisyo at mabilis na delivery.',
            'Napakahusay ng customer support nila.',
            'Produit excellent, je le recommande vivement.',
            'Service client formidable et livraison rapide.',

            # Neutral sentiment (label 1) - 10 items
            'The product is okay, nothing special about it.',
            'Average quality, meets basic requirements.',
            'It works as described, no complaints.',
            'Standard features, typical for this price range.',
            'Normal delivery time, packaging was fine.',
            'El producto está bien, nada especial.',
            'Calidad promedio, cumple con lo básico.',
            'Okay lang ang produkto, walang masama.',
            'Standard quality, normal lang.',
            'Produit correct, sans plus.',

            # Negative sentiment (label 2) - 13 items
            'Terrible product, completely waste of money.',
            'Poor quality, broke after one day of use.',
            'Worst customer service ever experienced.',
            'Delivery was extremely slow and packaging damaged.',
            'Would not recommend, very disappointing.',
            'Producto terrible, muy mala calidad.',
            'Servicio al cliente pésimo, muy decepcionante.',
            'Entrega lenta y producto dañado.',
            'Sobrang pangit ng produkto, sayang ang pera.',
            'Mabagal na delivery at sira pa ang item.',
            'Hindi ko irerekumenda, napakasama.',
            'Produit horrible, très décevant.',
            'Qualité épouvantable, service client nul.',
            'Livraison lente et produit défectueux.'  # This was the missing item
        ],
        'label': (
            [0] * 13 +  # 13 positive samples
            [1] * 10 +  # 10 neutral samples
            [2] * 13    # 13 negative samples
        )
    }

    # Safety check to ensure equal lengths
    if len(sample_data['text']) != len(sample_data['label']):
        print(f"WARNING: Length mismatch! Text: {len(sample_data['text'])}, Label: {len(sample_data['label'])}")
        min_len = min(len(sample_data['text']), len(sample_data['label']))
        sample_data['text'] = sample_data['text'][:min_len]
        sample_data['label'] = sample_data['label'][:min_len]

    df = pd.DataFrame(sample_data)
    df.to_csv('dataset.csv', index=False)
    print("Sample multilingual sentiment dataset created and saved as 'dataset.csv'")
    print(f"Dataset shape: {df.shape}")
    return df

In [30]:
def split_dataset(df, test_size=0.2, random_state=42):
    print(f"Splitting dataset: {test_size*100}% for validation...")

    X_train, X_val, y_train, y_val = train_test_split(
        df['text'].tolist(),
        df['label'].tolist(),
        test_size=test_size,
        random_state=random_state,
        stratify=df['label']
    )

    print(f"Training set: {len(X_train)} samples")
    print(f"Validation set: {len(X_val)} samples")

    return X_train, X_val, y_train, y_val

# TOKENIZATION AND PREPROCESSING

In [31]:
def create_tokenized_datasets(X_train, X_val, y_train, y_val, model_name):
    """Tokenize the datasets using the XtremeDistil tokenizer"""
    print("Loading XtremeDistil tokenizer and creating tokenized datasets...")

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # XtremeDistil uses BERT-style tokenization
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.unk_token if tokenizer.unk_token is not None else '[PAD]'

    # Create datasets
    train_dataset = Dataset.from_dict({
        'text': X_train,
        'labels': y_train
    })

    val_dataset = Dataset.from_dict({
        'text': X_val,
        'labels': y_val
    })

    def tokenize_function(examples):
        return tokenizer(
            examples['text'],
            truncation=True,
            padding=False,  # Will be handled by data collator
            max_length=128  # XtremeDistil works well with shorter sequences
        )

    # Tokenize datasets
    train_tokenized = train_dataset.map(tokenize_function, batched=True)
    val_tokenized = val_dataset.map(tokenize_function, batched=True)

    print("Tokenization complete!")
    print(f"Sample tokenized text length: {len(train_tokenized[0]['input_ids'])}")

    return train_tokenized, val_tokenized, tokenizer

# MODEL TRAINING

In [32]:
def train_model(train_dataset, val_dataset, tokenizer, model_name, output_dir, num_labels):
    """Train the XtremeDistil model for multilingual classification"""
    print("Initializing XtremeDistil model for multilingual classification training...")

    # Load model with appropriate number of labels
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels,
        problem_type="single_label_classification"
    )

    # Ensure model uses the correct pad_token_id
    if tokenizer.pad_token_id is not None:
        model.config.pad_token_id = tokenizer.pad_token_id

    # Data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Training arguments optimized for XtremeDistil
    training_args_dict = {
        'output_dir': output_dir,
        'num_train_epochs': 4,  # Slightly more epochs due to distilled model
        'per_device_train_batch_size': 16,  # Larger batch size (smaller model)
        'per_device_eval_batch_size': 16,
        'learning_rate': 5e-5,  # Higher learning rate for distilled models
        'weight_decay': 0.01,
        'warmup_ratio': 0.1,  # Warmup ratio instead of steps
        'logging_dir': f'{output_dir}/logs',
        'logging_steps': 10,
        'save_total_limit': 2,
        'load_best_model_at_end': True,
        'metric_for_best_model': "eval_f1_macro",
        'greater_is_better': True,
        'report_to': [],
        'seed': 42,
        'dataloader_num_workers': 0,
        'remove_unused_columns': True,
        'fp16': True,  # Mixed precision for efficiency
        'dataloader_pin_memory': False,
        'gradient_checkpointing': False,  # XtremeDistil is small enough
    }

    # Add version-specific parameters
    if hasattr(TrainingArguments, 'eval_strategy'):
        training_args_dict['eval_strategy'] = "epoch"
        training_args_dict['save_strategy'] = "epoch"
    else:
        training_args_dict['evaluation_strategy'] = "epoch"
        training_args_dict['save_strategy'] = "epoch"

    training_args = TrainingArguments(**training_args_dict)

    def compute_metrics(eval_pred):
        predictions, labels = eval_pred

        # Handle different prediction formats
        if isinstance(predictions, tuple):
            predictions = predictions[0]

        # Convert to numpy array if it's a tensor
        if hasattr(predictions, 'numpy'):
            predictions = predictions.numpy()

        predictions = np.argmax(predictions, axis=1)

        # Calculate metrics
        accuracy = accuracy_score(labels, predictions)
        f1_macro = f1_score(labels, predictions, average='macro')
        f1_weighted = f1_score(labels, predictions, average='weighted')

        return {
            'accuracy': accuracy,
            'f1_macro': f1_macro,
            'f1_weighted': f1_weighted,
        }

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    print("Starting training...")
    trainer.train()

    # Save the best model
    print(f"Saving model to {output_dir}...")
    trainer.save_model()
    tokenizer.save_pretrained(output_dir)

    return trainer, model

# MODEL EVALUATION

In [33]:
def evaluate_model(trainer, X_val, y_val, output_dir, num_labels):
    print("Evaluating model performance...")

    # Get predictions
    eval_results = trainer.evaluate()

    # Get detailed predictions for classification report
    predictions = trainer.predict(trainer.eval_dataset)

    # Extract predictions from the prediction object
    if hasattr(predictions, 'predictions'):
        preds = predictions.predictions
    else:
        preds = predictions[0]

    # Convert to numpy array if it's a tensor
    if hasattr(preds, 'numpy'):
        preds = preds.numpy()

    y_pred = np.argmax(preds, axis=1)

    # Generate classification report
    if num_labels == 2:
        target_names = ['Safe', 'NSFW']
    elif num_labels == 3:
        target_names = ['Positive', 'Neutral', 'Negative']
    else:
        target_names = [f'Class_{i}' for i in range(num_labels)]

    report = classification_report(
        y_val,
        y_pred,
        target_names=target_names,
        digits=4
    )

    # Calculate additional metrics
    accuracy = accuracy_score(y_val, y_pred)
    f1_macro = f1_score(y_val, y_pred, average='macro')
    f1_weighted = f1_score(y_val, y_pred, average='weighted')

    # Prepare metrics text
    metrics_text = f"""XtremeDistil Multilingual Classification Model Evaluation Results
{'='*70}

Model: microsoft/xtremedistil-l6-h384-uncased
Task: Multilingual Text Classification ({num_labels} classes)
Architecture: 6 layers, 384 hidden units (Distilled)

Performance Metrics:
{'-'*30}
Accuracy: {accuracy:.4f}
F1-Score (Macro): {f1_macro:.4f}
F1-Score (Weighted): {f1_weighted:.4f}

Classification Report:
{report}

Training Results:
{'-'*30}
"""

    for key, value in eval_results.items():
        if isinstance(value, (int, float)):
            metrics_text += f"{key}: {value:.4f}\n"

    # Save metrics
    os.makedirs('metrics', exist_ok=True)
    metrics_path = 'metrics/xtremedistil_metrics.txt'

    with open(metrics_path, 'w') as f:
        f.write(metrics_text)

    print(f"Metrics saved to {metrics_path}")
    print(f"Validation Accuracy: {accuracy:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")

    return accuracy, report

# ONNX EXPORT

In [34]:
def export_to_onnx(model_dir, onnx_path):
    print("Exporting XtremeDistil model to ONNX format...")

    try:
        # Load the trained model
        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        tokenizer = AutoTokenizer.from_pretrained(model_dir)

        # Create dummy input with multilingual sample
        dummy_input = tokenizer(
            "This is a sample multilingual text for ONNX export",
            return_tensors="pt",
            max_length=128,
            padding="max_length",
            truncation=True
        )

        # Export to ONNX with optimization for XtremeDistil
        os.makedirs(os.path.dirname(onnx_path), exist_ok=True)
        torch.onnx.export(
            model,
            tuple(dummy_input.values()),
            onnx_path,
            export_params=True,
            opset_version=14,
            do_constant_folding=True,
            input_names=['input_ids', 'attention_mask'],
            output_names=['logits'],
            dynamic_axes={
                'input_ids': {0: 'batch_size', 1: 'sequence'},
                'attention_mask': {0: 'batch_size', 1: 'sequence'},
                'logits': {0: 'batch_size'}
            }
        )

        print(f"ONNX model exported to: {onnx_path}")

        # Get model size
        model_size = os.path.getsize(onnx_path) / (1024 * 1024)
        print(f"ONNX model size: {model_size:.2f} MB")

        return True

    except Exception as e:
        print(f"ONNX export failed: {e}")
        import traceback
        traceback.print_exc()
        return False

# TENSORFLOW LITE EXPORT

In [35]:
def export_to_tflite_from_pt(model_dir, tflite_path):
    try:
        import tensorflow as tf
        from transformers import TFAutoModelForSequenceClassification

        print("Converting XtremeDistil PyTorch model to TensorFlow...")

        # Load and convert to TensorFlow
        tf_model = TFAutoModelForSequenceClassification.from_pretrained(
            model_dir,
            from_pt=True
        )

        # Save as TensorFlow SavedModel
        tf_saved_model_dir = os.path.join(model_dir, "tf_saved_model")
        tf.saved_model.save(tf_model, tf_saved_model_dir)
        print(f"Saved intermediate TensorFlow model to {tf_saved_model_dir}")

        # Convert to TFLite with optimizations
        converter = tf.lite.TFLiteConverter.from_saved_model(tf_saved_model_dir)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

        # Additional optimizations for mobile deployment
        converter.target_spec.supported_types = [tf.float16]

        tflite_model = converter.convert()

        # Save TFLite model
        os.makedirs(os.path.dirname(tflite_path), exist_ok=True)
        with open(tflite_path, "wb") as f:
            f.write(tflite_model)

        # Get model size
        model_size = os.path.getsize(tflite_path) / (1024 * 1024)
        print(f"TFLite model successfully exported to: {tflite_path}")
        print(f"TFLite model size: {model_size:.2f} MB")

        return True

    except Exception as e:
        print(f"TensorFlow Lite export failed: {e}")
        import traceback
        traceback.print_exc()
        return False

# MAIN

In [36]:
def main():
    print("Starting XtremeDistil Multilingual Text Classification Pipeline")
    print("="*70)

    # Configuration - Updated to use XtremeDistil model
    MODEL_NAME = "microsoft/xtremedistil-l6-h384-uncased"
    OUTPUT_DIR = "models/xtremedistil_classification"
    ONNX_PATH = "models/xtremedistil_classification_model.onnx"
    TFLITE_PATH = "models/xtremedistil_classification_model.tflite"

    # Create output directories
    os.makedirs("models", exist_ok=True)
    os.makedirs("metrics", exist_ok=True)

    print(f"Using model: {MODEL_NAME}")
    print(f"Output directory: {OUTPUT_DIR}")

    # Step 1: Load dataset
    df = load_dataset()
    num_labels = len(df['label'].unique())

    # Step 2: Split dataset
    X_train, X_val, y_train, y_val = split_dataset(df)

    # Step 3: Create tokenized datasets
    train_dataset, val_dataset, tokenizer = create_tokenized_datasets(
        X_train, X_val, y_train, y_val, MODEL_NAME
    )

    # Step 4: Train model
    trainer, model = train_model(
        train_dataset, val_dataset, tokenizer, MODEL_NAME, OUTPUT_DIR, num_labels
    )

    # Step 5: Evaluate model
    accuracy, report = evaluate_model(trainer, X_val, y_val, OUTPUT_DIR, num_labels)

    # Step 6: Export to ONNX
    onnx_success = export_to_onnx(OUTPUT_DIR, ONNX_PATH)

    # Step 7: Export to TFLite
    tflite_success = export_to_tflite_from_pt(OUTPUT_DIR, TFLITE_PATH)

    # Final output
    print("\n" + "="*70)
    print("XtremeDistil Multilingual Classification Training Complete!")

    if onnx_success:
        print(f"✅ ONNX model: {ONNX_PATH}")
    else:
        print("❌ ONNX export: FAILED")

    if tflite_success:
        print(f"✅ TFLite model: {TFLITE_PATH}")
    else:
        print("❌ TFLite export: FAILED")

    print(f"\nModel checkpoints: {OUTPUT_DIR}")
    print(f"Metrics: metrics/xtremedistil_metrics.txt")
    print(f"Final validation accuracy: {accuracy:.4f}")

# INFERENCE

In [37]:
def test_inference(model_dir, test_texts=None):
    if test_texts is None:
        test_texts = [
            # English
            "I absolutely love this product, it's amazing!",  # Positive
            "The product is okay, nothing special.",  # Neutral
            "Terrible quality, completely disappointed.",  # Negative

            # Spanish
            "Me encanta este producto, es fantástico.",  # Positive
            "El producto está bien, nada especial.",  # Neutral
            "Calidad terrible, muy decepcionante.",  # Negative

            # Tagalog
            "Napakaganda ng produktong ito, sulit na sulit!",  # Positive
            "Okay lang ang produkto, walang masama.",  # Neutral
            "Sobrang pangit ng produkto, sayang ang pera.",  # Negative

            # French
            "J'adore ce produit, il est formidable!",  # Positive
            "Le produit est correct, sans plus.",  # Neutral
            "Qualité horrible, très décevant."  # Negative
        ]

    print("\nTesting trained XtremeDistil multilingual model...")

    try:
        # Load model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(model_dir)
        tokenizer = AutoTokenizer.from_pretrained(model_dir)

        model.eval()

        # Determine label names based on number of labels
        num_labels = model.config.num_labels
        if num_labels == 2:
            label_names = ["Safe", "NSFW"]
        elif num_labels == 3:
            label_names = ["Positive", "Neutral", "Negative"]
        else:
            label_names = [f"Class_{i}" for i in range(num_labels)]

        for i, text in enumerate(test_texts):
            # Tokenize
            inputs = tokenizer(
                text,
                return_tensors="pt",
                truncation=True,
                max_length=128,
                padding=True
            )

            # Predict
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(predictions, dim=-1).item()
                confidence = predictions[0][predicted_class].item()

            # Map predictions to labels
            label = label_names[predicted_class] if predicted_class < len(label_names) else f"Class_{predicted_class}"

            print(f"Text {i+1}: '{text}'")
            print(f"  -> {label} (confidence: {confidence:.4f})")

            # Show all probabilities
            probs_str = ", ".join([f"{label_names[j] if j < len(label_names) else f'Class_{j}'}={predictions[0][j].item():.3f}"
                                 for j in range(num_labels)])
            print(f"  Probabilities: {probs_str}")
            print()

    except Exception as e:
        print(f"Inference test failed: {e}")
        import traceback
        traceback.print_exc()

# PROGRAM EXECUTION

In [38]:
if __name__ == "__main__":
    try:
        # Run the main pipeline
        main()

        # Optional: Test inference
        test_inference("models/xtremedistil_classification")

    except KeyboardInterrupt:
        print("\nTraining interrupted by user.")
    except Exception as e:
        print(f"Error during execution: {e}")
        import traceback
        traceback.print_exc()

    print("\nProgram execution completed.")

Starting XtremeDistil Multilingual Text Classification Pipeline
Using model: microsoft/xtremedistil-l6-h384-uncased
Output directory: models/xtremedistil_classification
Loading dataset from dataset.csv...
Error: Dataset file 'dataset.csv' not found!
Creating a sample multilingual dataset for demonstration...
Sample multilingual sentiment dataset created and saved as 'dataset.csv'
Dataset shape: (36, 2)
Splitting dataset: 20.0% for validation...
Training set: 28 samples
Validation set: 8 samples
Loading XtremeDistil tokenizer and creating tokenized datasets...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/526 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/28 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Tokenization complete!
Sample tokenized text length: 17
Initializing XtremeDistil model for multilingual classification training...


pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/xtremedistil-l6-h384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Starting training...


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,F1 Weighted
1,No log,1.098022,0.375,0.181818,0.204545
2,No log,1.095703,0.375,0.181818,0.204545
3,No log,1.093384,0.5,0.366667,0.4125
4,No log,1.092529,0.5,0.366667,0.4125


Saving model to models/xtremedistil_classification...
Evaluating model performance...


Metrics saved to metrics/xtremedistil_metrics.txt
Validation Accuracy: 0.5000
F1-Score (Macro): 0.3667
Exporting XtremeDistil model to ONNX format...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  torch.onnx.export(
  inverted_mask = torch.tensor(1.0, dtype=dtype) - expanded_mask
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


ONNX model exported to: models/xtremedistil_classification_model.onnx
ONNX model size: 86.80 MB
Converting XtremeDistil PyTorch model to TensorFlow...


All PyTorch model weights were used when initializing TFBertForSequenceClassification.

All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


Saved intermediate TensorFlow model to models/xtremedistil_classification/tf_saved_model
TFLite model successfully exported to: models/xtremedistil_classification_model.tflite
TFLite model size: 43.47 MB

XtremeDistil Multilingual Classification Training Complete!
✅ ONNX model: models/xtremedistil_classification_model.onnx
✅ TFLite model: models/xtremedistil_classification_model.tflite

Model checkpoints: models/xtremedistil_classification
Metrics: metrics/xtremedistil_metrics.txt
Final validation accuracy: 0.5000

Testing trained XtremeDistil multilingual model...
Text 1: 'I absolutely love this product, it's amazing!'
  -> Positive (confidence: 0.3483)
  Probabilities: Positive=0.348, Neutral=0.323, Negative=0.329

Text 2: 'The product is okay, nothing special.'
  -> Negative (confidence: 0.3390)
  Probabilities: Positive=0.331, Neutral=0.330, Negative=0.339

Text 3: 'Terrible quality, completely disappointed.'
  -> Negative (confidence: 0.3456)
  Probabilities: Positive=0.338, Neutr