In [None]:
!pip install transformers datasets

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, concatenate_datasets
import torch


model_name = "s-nlp/xlmr_formality_classifier"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ds_dict = load_dataset("osyvokon/pavlick-formality-scores")
dataset = concatenate_datasets([ds_dict["train"], ds_dict["test"]])

In [3]:
def predict_formality(example):
    inputs = tokenizer(example["sentence"], return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)
        formal_prob = probs[0][model.config.label2id["formal"]].item()  # ← get the probability of "formal"
        return {"model_pred": formal_prob}


dataset = dataset.map(predict_formality)


Map:   0%|          | 0/11274 [00:00<?, ? examples/s]

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

# import pandas as pd

# df = dataset.to_pandas()

# df = df[["sentence", "domain", "avg_score", "model_pred"]]


# output_path = "/content/drive/MyDrive/xlmr_predictions.csv"
# df.to_csv(output_path, index=False)

# print("Predictions saved")


In [None]:
from scipy.stats import spearmanr


rho_total, pval_total = spearmanr(dataset["avg_score"], dataset["model_pred"])
print(f"\n🔹 Overall Spearman R: {rho_total:.4f} (p = {pval_total:.4e})")


print("\nSpearman R per domain:")
domains = set(dataset["domain"])
for domain in domains:
    subset = dataset.filter(lambda x: x["domain"] == domain)
    rho, pval = spearmanr(subset["avg_score"], subset["model_pred"])
    print(f"{domain:<10} → Spearman R = {rho:.4f} (p = {pval:.4e})")



🔹 Overall Spearman R: 0.6593 (p = 0.0000e+00)

📊 Spearman R per domain:


Filter:   0%|          | 0/11274 [00:00<?, ? examples/s]

blog       → Spearman R = 0.4082 (p = 4.5800e-74)


Filter:   0%|          | 0/11274 [00:00<?, ? examples/s]

news       → Spearman R = 0.1060 (p = 2.1654e-08)


Filter:   0%|          | 0/11274 [00:00<?, ? examples/s]

answers    → Spearman R = 0.7073 (p = 0.0000e+00)


Filter:   0%|          | 0/11274 [00:00<?, ? examples/s]

email      → Spearman R = 0.5061 (p = 2.6837e-111)
