In [1]:
import torch
print(torch.backends.mps.is_available()) 

True


In [3]:
import json
from xcomet.deberta_encoder import XCOMETLite
from typing import Optional

def evaluate_fluency_with_xcomet(
    input_path: str,
    output_path: Optional[str] = "xcomet_fluency_scores.json",
    model_name: str = "myyycroft/XCOMET-lite",
    batch_size: int = 2,
    num_workers: int = 0,
    accelerator: str = "auto",
    devices: list = [0]
) -> float:
    """
    Loads input data, runs XCOMET fluency prediction, and optionally saves scores.

    Args:
        input_path (str): Path to the JSON input file.
        output_path (str, optional): Where to save the detailed scores. Set to None to skip saving.
        model_name (str): Hugging Face model name or local path.
        batch_size (int): Batch size for prediction.
        num_workers (int): Number of worker processes (0 for none).
        accelerator (str): Accelerator type (e.g., "auto", "cpu", "gpu").
        devices (list): List of device IDs.

    Returns:
        float: The average fluency score.
    """
    # Load data
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # Load model
    model = XCOMETLite.from_pretrained(model_name)

    # Predict
    result = model.predict(
        data,
        batch_size=batch_size,
        num_workers=num_workers,
        accelerator=accelerator,
        devices=devices
    )

    # Save scores
    if output_path:
        with open(output_path, "w", encoding="utf-8") as out:
            json.dump(result.scores, out, indent=2)

    # Print and return average
    avg_score = sum(result.scores) / len(result.scores)
    print(f"Average Fluency Score: {avg_score}")
    return avg_score

In [4]:
evaluate_fluency_with_xcomet("xcomet_input_data_de.json")

Encoder model frozen.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|██████████| 200/200 [01:12<00:00,  2.74it/s]


Average Fluency Score: 0.8999665227532386


0.8999665227532386

In [5]:
evaluate_fluency_with_xcomet("xcomet_input_data_en.json")

Encoder model frozen.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|██████████| 200/200 [00:43<00:00,  4.60it/s]


Average Fluency Score: 0.8715424510091543


0.8715424510091543

In [6]:
evaluate_fluency_with_xcomet("xcomet_input_data_de_base.json")

Encoder model frozen.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|██████████| 200/200 [01:06<00:00,  3.00it/s]


Average Fluency Score: 0.9424139831960201


0.9424139831960201

In [7]:
evaluate_fluency_with_xcomet("xcomet_input_data_en_base.json")

Encoder model frozen.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|██████████| 200/200 [00:43<00:00,  4.60it/s]


Average Fluency Score: 0.6290126404166222


0.6290126404166222