In [1]:
from datasets import load_dataset
from pprint import pprint
from sentence_transformers import SentenceTransformer
import torch

# Check if a GPU is available
if torch.cuda.is_available():
    print("CUDA is available")

  from .autonotebook import tqdm as notebook_tqdm


CUDA is available


In [8]:
# model_checkpoint = "paraphrase-multilingual-MiniLM-L12-v2"

# model = SentenceTransformer(
#     model_checkpoint,
#     device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
# )

In [3]:
# Checkpoints to evaluate
checkpoints = [
    "sentence-transformers/all-mpnet-base-v2",
    "sentence-transformers/all-MiniLM-L6-v2",
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
    "HZeroxium/paraphrase-multilingual-MiniLM-L12-v2-job-cv-multi-dataset",
    "CrazyDave53/OpenCV-finetuned",
]
evaluation_results = {}

In [4]:
binary_dataset = load_dataset("HZeroxium/job-cv-binary")
triplet_dataset = load_dataset("HZeroxium/cv-job-triplet")
similarity_dataset = load_dataset("HZeroxium/cv-job-similarity")
job_paraphrase_dataset = load_dataset("HZeroxium/job-paraphrase")
cv_paraphrase_dataset = load_dataset("HZeroxium/cv-paraphrase")

In [5]:
eval_binary_dataset = binary_dataset["test"]
eval_triplet_dataset = triplet_dataset["test"]
eval_similarity_dataset = similarity_dataset["test"]
eval_job_paraphrase_dataset = job_paraphrase_dataset["test"]
eval_cv_paraphrase_dataset = cv_paraphrase_dataset["test"]

In [6]:
eval_dataset = {
    "binary": eval_binary_dataset,
    "triplet": eval_triplet_dataset,
    "similarity": eval_similarity_dataset,
    "job_paraphrase": eval_job_paraphrase_dataset,
    "cv_paraphrase": eval_cv_paraphrase_dataset,
}

In [7]:
from sentence_transformers.evaluation import (
    BinaryClassificationEvaluator,
    TripletEvaluator,
    EmbeddingSimilarityEvaluator,
)

job_scores = [1] * len(eval_job_paraphrase_dataset["text1"])
cv_scores = [1] * len(eval_cv_paraphrase_dataset["text1"])

binary_evaluator = BinaryClassificationEvaluator(
    sentences1=eval_binary_dataset["text1"],
    sentences2=eval_binary_dataset["text2"],
    labels=eval_binary_dataset["label"],
)

triplet_evaluator = TripletEvaluator(
    anchors=eval_triplet_dataset["anchor"],
    positives=eval_triplet_dataset["positive"],
    negatives=eval_triplet_dataset["negative"],
)

similarity_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=eval_similarity_dataset["text1"],
    sentences2=eval_similarity_dataset["text2"],
    scores=eval_similarity_dataset["score"],
)

job_paraphrase_evaluator = BinaryClassificationEvaluator(
    sentences1=eval_job_paraphrase_dataset["text1"],
    sentences2=eval_job_paraphrase_dataset["text2"],
    labels=job_scores,
)

cv_paraphrase_evaluator = BinaryClassificationEvaluator(
    sentences1=eval_cv_paraphrase_dataset["text1"],
    sentences2=eval_cv_paraphrase_dataset["text2"],
    labels=cv_scores,
)

In [8]:
from sentence_transformers.evaluation import SequentialEvaluator

evaluator = SequentialEvaluator(
    [
        binary_evaluator,
        triplet_evaluator,
        similarity_evaluator,
        job_paraphrase_evaluator,
        cv_paraphrase_evaluator,
    ]
)

In [9]:
#
for checkpoint in checkpoints:
    print(f"Evaluating checkpoint: {checkpoint}")

    # Load model
    model = SentenceTransformer(
        checkpoint,
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    )

    evaluation_score = evaluator(model=model)
    evaluation_results[checkpoint] = evaluation_score

    print(f"Checkpoint {checkpoint} - Score: {evaluation_score}")

Evaluating checkpoint: sentence-transformers/all-mpnet-base-v2
Checkpoint sentence-transformers/all-mpnet-base-v2 - Score: {'cosine_accuracy': np.float64(0.9936305732484076), 'cosine_accuracy_threshold': np.float32(0.047278613), 'cosine_f1': np.float64(0.9968051118210862), 'cosine_f1_threshold': np.float32(0.047278613), 'cosine_precision': 1.0, 'cosine_recall': np.float64(0.9936305732484076), 'cosine_ap': np.float64(1.0), 'pearson_cosine': np.float64(0.23218382248628994), 'spearman_cosine': np.float64(0.2407817508398101), 'sequential_score': np.float64(1.0)}
Evaluating checkpoint: sentence-transformers/all-MiniLM-L6-v2
Checkpoint sentence-transformers/all-MiniLM-L6-v2 - Score: {'cosine_accuracy': np.float64(0.9936305732484076), 'cosine_accuracy_threshold': np.float32(0.07158986), 'cosine_f1': np.float64(0.9968051118210862), 'cosine_f1_threshold': np.float32(0.07158986), 'cosine_precision': 1.0, 'cosine_recall': np.float64(0.9936305732484076), 'cosine_ap': np.float64(1.0), 'pearson_cosi

In [12]:
print("\nEvaluation Results:")
for checkpoint, score in evaluation_results.items():
    pprint(f"{checkpoint}: {score}")


Evaluation Results:
("sentence-transformers/all-mpnet-base-v2: {'cosine_accuracy': "
 "np.float64(0.9936305732484076), 'cosine_accuracy_threshold': "
 "np.float32(0.047278613), 'cosine_f1': np.float64(0.9968051118210862), "
 "'cosine_f1_threshold': np.float32(0.047278613), 'cosine_precision': 1.0, "
 "'cosine_recall': np.float64(0.9936305732484076), 'cosine_ap': "
 "np.float64(1.0), 'pearson_cosine': np.float64(0.23218382248628994), "
 "'spearman_cosine': np.float64(0.2407817508398101), 'sequential_score': "
 'np.float64(1.0)}')
("sentence-transformers/all-MiniLM-L6-v2: {'cosine_accuracy': "
 "np.float64(0.9936305732484076), 'cosine_accuracy_threshold': "
 "np.float32(0.07158986), 'cosine_f1': np.float64(0.9968051118210862), "
 "'cosine_f1_threshold': np.float32(0.07158986), 'cosine_precision': 1.0, "
 "'cosine_recall': np.float64(0.9936305732484076), 'cosine_ap': "
 "np.float64(1.0), 'pearson_cosine': np.float64(0.36789476340431904), "
 "'spearman_cosine': np.float64(0.39007503943068

In [15]:
import pandas as pd
import numpy as np

# Chuyển đổi thành DataFrame
df = pd.DataFrame(evaluation_results).T

# Rút gọn các giá trị số xuống 3 chữ số thập phân
df = df.map(lambda x: round(x, 3) if isinstance(x, (float, np.floating)) else x)


# Ghi ra file CSV nếu cần
df.to_csv("evaluation_results.csv", index=True)