In [6]:
from sentence_transformers import SentenceTransformer, InputExample, losses, util
from torch.utils.data import DataLoader
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from datasets import load_dataset



In [8]:
# Load pre-trained Sentence-BERT model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Load STS Benchmark dataset
dataset = load_dataset("PhilipMay/stsb_multi_mt", "en")

# Extract relevant data from the dataset
sts_examples = []
for data in dataset['train']:
    text_a = data['sentence1']
    text_b = data['sentence2']
    similarity_score = float(data['similarity_score']) / 5.0  # Normalize similarity score to range [0, 1]
    sts_examples.append(InputExample(texts=[text_a, text_b], label=similarity_score))

# Define train dataloader
train_dataloader = DataLoader(sts_examples, shuffle=True, batch_size=16)

# Define loss function
train_loss = losses.CosineSimilarityLoss(model)

# Fine-tune the model
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=1)

100%|██████████| 360/360 [00:54<00:00,  6.62it/s]

{'train_runtime': 54.4086, 'train_samples_per_second': 105.663, 'train_steps_per_second': 6.617, 'train_loss': 0.027549976772732205, 'epoch': 1.0}





In [11]:
job_description = "We are looking for a skilled software engineer with experience in Python and machine learning."
articles = [
    "An article about Python programming and its applications in machine learning.",
    "A detailed guide on how to become a machine learning specialist.",
    "An overview of the latest trends in software engineering."
]
# Compute the similarity score between the sentences

# Print the similarity score
for article in articles:
    similarity_score = util.cos_sim(model.encode(job_description), model.encode(article))
    print(f"Similarity between the job description and '{article}': {similarity_score.item():.4f}")

Similarity between the job description and 'An article about Python programming and its applications in machine learning.': 0.6093
Similarity between the job description and 'A detailed guide on how to become a machine learning specialist.': 0.5278
Similarity between the job description and 'An overview of the latest trends in software engineering.': 0.4220
