In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

model_path = "model/tuned-bert"
tokenizer_path = "model/tuned-bert-tokenizer"

tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
model = BertForSequenceClassification.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set model to evaluation mode

def predict(loan_word, original_word):
    """Predicts if a loanword belongs to the target language."""
    
    encoded_input = tokenizer(
        f"{loan_word} [SEP] {original_word}",
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt",
    )

    input_ids = encoded_input["input_ids"].to(device)
    attention_mask = encoded_input["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    
    logits = outputs.logits
    predicted_label = torch.argmax(logits, dim=1).item()  # Convert to class label
    print(logits)
    return predicted_label

loan_word = "हैवान"
original_word = "بیابان"

predicted_class = predict(loan_word, original_word)
print(f"Prediction: {predicted_class}")


tensor([[ 0.4959, -0.6561]])
Prediction: 0


In [17]:
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import os

language_pairs = [
    ("Azerbaijani", "Arabic"),
    ("Catalan", "Arabic"),
    ("Chinese", "English"),
    ("English", "French"),
    ("English", "German"),
    ("Finnish", "Swedish"),
    ("German", "French"),
    ("German", "Italian"),
    ("Hindi", "Persian"),
    ("Hungarian", "German"),
    ("Indonesian", "Dutch"),
    ("Kazakh", "Russian"),
    ("Persian", "Arabic"),
    ("Polish", "French"),
    ("Romanian", "French"),
    ("Romanian", "Hungarian"),
]
label_mapping = {
    "random": 0,
    "hard_negative": 1,
    "loan": 0,
    "synonym": 0
}

def read_language(lang1, lang2):
    file_path = f"data/production_train_test/{lang1}-{lang2}/balanced/{lang1}-{lang2}-test_production_balanced.csv"    
    if os.path.exists(file_path):  
        df = pd.read_csv(file_path)
        df = df.drop(columns=[col for col in ['Unnamed: 0.1', 'Unnamed: 0'] if col in df.columns], errors="ignore")
        df["language_pair"] = f"{lang1}-{lang2}"
        df["label"] = df["label"].map(label_mapping).astype(int) 
        return df
    else:
        print(f"File not found: {file_path}")
        return None


In [18]:
def predict_batch(loan_words, original_words):

    encoded_inputs = tokenizer(
        [f"{lw} [SEP] {ow}" for lw, ow in zip(loan_words, original_words)],
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt",
    )

    input_ids = encoded_inputs["input_ids"].to(device)
    attention_mask = encoded_inputs["attention_mask"].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1).cpu().numpy()  
    return predicted_labels, logits.cpu().numpy()


results = []

for lang1, lang2 in language_pairs:
    df = read_language(lang1, lang2)
    
    if df is not None:
        y_true = df["label"].values
        y_pred, logits = predict_batch(df["loan_word"].tolist(), df["original_word"].tolist())  

        accuracy = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average="weighted")

        print(f"{lang1}-{lang2} | Accuracy: {accuracy:.4f} | F1-score: {f1:.4f}")

        results.append({
            "Language Pair": f"{lang1}-{lang2}",
            "Accuracy": accuracy,
            "F1-score": f1
        })

        # Store text-value pairs
        text_value_pairs = list(zip(df["loan_word"], df["original_word"], y_pred))
        print("Sample Predictions:")
        for loan, orig, pred in text_value_pairs[:1]: 
            print(f"  - Loan: {loan} | Original: {orig} | Predicted Label: {pred}")

    print("-" * 50)

results_df = pd.DataFrame(results)
results_df.to_csv("bert_results.csv", index=False)
print("\n Results saved to `bert_results.csv`")


Azerbaijani-Arabic | Accuracy: 0.9732 | F1-score: 0.9722
Sample Predictions:
  - Loan: məlumat | Original: مَعْلُومَات | Predicted Label: 0
--------------------------------------------------
Catalan-Arabic | Accuracy: 0.9444 | F1-score: 0.9175
Sample Predictions:
  - Loan: moixama | Original: مُشَمَّع | Predicted Label: 0
--------------------------------------------------
Chinese-English | Accuracy: 0.9421 | F1-score: 0.9329
Sample Predictions:
  - Loan: 逆境 | Original: Catastrophe | Predicted Label: 0
--------------------------------------------------
English-French | Accuracy: 0.9202 | F1-score: 0.9062
Sample Predictions:
  - Loan: gutter rabbit | Original: lapin de gouttière | Predicted Label: 0
--------------------------------------------------
English-German | Accuracy: 0.9116 | F1-score: 0.9116
Sample Predictions:
  - Loan: Stopp | Original: Stopp | Predicted Label: 0
--------------------------------------------------
Finnish-Swedish | Accuracy: 0.9280 | F1-score: 0.9186
Sample Pr

In [19]:
results_df

Unnamed: 0,Language Pair,Accuracy,F1-score
0,Azerbaijani-Arabic,0.973214,0.972161
1,Catalan-Arabic,0.944444,0.91746
2,Chinese-English,0.942105,0.932853
3,English-French,0.920197,0.906185
4,English-German,0.911565,0.911565
5,Finnish-Swedish,0.928,0.918634
6,German-French,0.915441,0.907393
7,German-Italian,0.88,0.88
8,Hindi-Persian,0.915663,0.906113
9,Hungarian-German,0.907407,0.913563
