In [1]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Transformer, Pooling
from transformers import AutoTokenizer
from typing import Union, List, Dict, Tuple

model_path = "mymusise/gpt2-medium-chinese"


model = SentenceTransformer(model_path)


# for some tokenizer without cls_token in vocab
if model.tokenizer.cls_token_id >= model.tokenizer.vocab_size:
    model.tokenizer.cls_token = model.tokenizer.pad_token

No sentence-transformers model found with name /home/mymusise/.cache/torch/sentence_transformers/mymusise_gpt2-medium-chinese. Creating a new one with MEAN pooling.
Some weights of the model checkpoint at /home/mymusise/.cache/torch/sentence_transformers/mymusise_gpt2-medium-chinese were not used when initializing GPT2Model: ['score.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
from datasets import load_dataset

dataset = load_dataset("clue", "afqmc")

Found cached dataset clue (/home/mymusise/.cache/huggingface/datasets/clue/afqmc/1.0.0/e508b66266ba417d60e89ed8b167699cb4b56d3a2ead29b5667907d08069dbfc)
100%|██████████| 3/3 [00:00<00:00, 1321.32it/s]


In [4]:
from sentence_transformers import SentenceTransformer, SentencesDataset, InputExample, losses
from torch.utils.data import DataLoader

train_dataset = DataLoader([InputExample(texts=[sample['sentence1'], sample['sentence2']], label=float(sample['label'])) for sample in dataset['train']], shuffle=True, batch_size=32)

In [5]:
train_loss = losses.CosineSimilarityLoss(model)
model.fit(train_objectives=[(train_dataset, train_loss)], epochs=3, warmup_steps=100)

Iteration: 100%|██████████| 1073/1073 [04:58<00:00,  3.59it/s]
Iteration: 100%|██████████| 1073/1073 [04:59<00:00,  3.58it/s]
Iteration: 100%|██████████| 1073/1073 [04:49<00:00,  3.70it/s]
Epoch: 100%|██████████| 3/3 [14:48<00:00, 296.08s/it]


In [6]:
from sklearn.metrics import precision_recall_fscore_support, precision_score, f1_score
from scipy.spatial.distance import cosine
from tqdm import tqdm

eva_input = []
eva_y = []
eva_y_pred = []

test_set = dataset['validation']

vec1s = model.encode(test_set['sentence1'])
vec2s = model.encode(test_set['sentence2'])
label = test_set['label']
for v1, v2, l in tqdm(zip(vec1s, vec2s, label)):
    dis = cosine(v1, v2)
    sim = 1 - dis
    eva_y_pred.append(1 if sim > 0.5 else 0)
    # eva_input.append([sent1, sent2])
    eva_y.append(l)

precision = precision_score(eva_y, eva_y_pred)
f1 = f1_score(eva_y, eva_y_pred)
print(f"{precision=:.03}, {f1=:.03}")

4316it [00:00, 28798.77it/s]

precision=0.49, f1=0.47



