In [2]:
from components import *
import argparse
import yaml
import torch
import os
import re
import time
from transformers import AutoTokenizer
# from tqdm import tqdm
# from torch.nn.parallel import DistributedDataParallel, DataParallel
from torch.utils.data import ConcatDataset
from tqdm import tqdm

In [2]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

In [3]:
config_path = './config/bert-multilingual-regressor.yml'
with open(config_path, 'r') as f:
    opt = yaml.safe_load(f)
    print(yaml.dump(opt, default_flow_style=False, indent=4, explicit_start=True, explicit_end=True, sort_keys=False))
    f.close()

---
seed: 42
datasets:
    ViSTS:
        data_path: ./data/no_segment/ViSTS.json
        train_path: ./data/train_test_split/ViSTS-no-segment-train.json
        test_path: ./data/train_test_split/ViSTS-no-segment-test.json
        data_module: ViSTSRegressionDataset
        test_size: 0.3
        reverse_input: true
        force_remake: false
tokenizer: amberoad/bert-multilingual-passage-reranking-msmarco
hf_cache: ../hf_cache
max_length: 512
pretrained_path: ./model_checkpoints/bertregressor-finetune/epoch_11.pt
load_state_dict_option: force_load
model:
    model_type: Bert
    hidden_size: 768
    num_hidden_layers: 12
    num_attention_heads: 12
    intermediate_size: 3072
    hidden_act: gelu
    hidden_dropout_prob: 0.1
    attention_probs_dropout_prob: 0.1
    max_position_embeddings: 512
    position_embedding_type: absolute
    torch_dtype: float32
    type_vocab_size: 2
    layer_norm_eps: 1.0e-12
    initializer_range: 0.02
    pooler_fc_size: 768
    pooler_num_attention_h

In [4]:
opt['tokenizer'] = AutoTokenizer.from_pretrained(opt['tokenizer'], cache_dir=opt['hf_cache'])

In [5]:
total_train_set, total_test_set = [], []
for k, v in opt['datasets'].items():
    print(f'Make {k} dataset!')
    train_set, test_set = get_dataset(**v, **opt)
    total_train_set.append(train_set)
    total_test_set.append(test_set)
total_train_set = ConcatDataset(total_train_set)
total_test_set = ConcatDataset(total_test_set)
print('Num of training samples:', len(total_train_set))
print('Num of testing samples:', len(total_test_set))
train_loader = get_dataloader(total_train_set, 'train', **opt)
test_loader = get_dataloader(total_test_set, 'test', **opt)

Make ViSTS dataset!
Num of training samples: 33390
Num of testing samples: 14310


In [6]:
batch = next(iter(train_loader))
for k, v in batch.items():
    print(k, v.shape)

input_ids torch.Size([8, 512])
attention_mask torch.Size([8, 512])
token_type_ids torch.Size([8, 512])
labels torch.Size([8])


In [7]:
batch

{'input_ids': tensor([[  101, 11104, 19538,  ...,     0,     0,     0],
         [  101, 10381, 24274,  ...,     0,     0,     0],
         [  101, 11808, 10348,  ...,     0,     0,     0],
         ...,
         [  101, 10381, 10173,  ...,     0,     0,     0],
         [  101, 10381, 11107,  ...,     0,     0,     0],
         [  101, 13468, 10972,  ...,     0,     0,     0]]),
 'attention_mask': tensor([[1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         ...,
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.]]),
 'token_type_ids': tensor([[0, 0, 0,  ..., 1, 1, 1],
         [0, 0, 0,  ..., 1, 1, 1],
         [0, 0, 0,  ..., 1, 1, 1],
         ...,
         [0, 0, 0,  ..., 1, 1, 1],
         [0, 0, 0,  ..., 1, 1, 1],
         [0, 0, 0,  ..., 1, 1, 1]]),
 'labels': tensor([0.2300, 0.0800, 0.5200, 0.6400, 0.4800, 0.8000, 0.5200, 0.6200])}

In [8]:
torch.any(batch['input_ids'] >= opt['tokenizer'].vocab_size)

tensor(False)

In [14]:
device = torch.device(opt['device'])
model = torch.load('./model_checkpoints/bertclassifier-finetune/epoch_12.pt', map_location=device)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [16]:
loss_fct = torch.nn.MSELoss()
losses = []
pbar = tqdm(test_loader)
for batch in pbar:
    batch = {k:v.to(device) for k,v in batch.items()}
    labels = batch.pop('labels')
    with torch.no_grad():
        outputs = model(**batch)
        logits = torch.softmax(outputs.logits, dim=-1).max(dim=-1).values
        loss = loss_fct(logits, labels)
        losses.append(torch.pow(loss, 1/2).item())
        pbar.set_postfix({'loss':losses[-1]})
        


100%|██████████| 1789/1789 [10:56<00:00,  2.73it/s, loss=0.536]


In [17]:
print('Avg_loss =', torch.tensor(losses).mean().item())

Avg_loss = 0.4262198805809021


In [10]:
model = load_backbone(**opt)
# print(model)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print('Total parameters:', pytorch_total_params)

Force load model object from file!
Total parameters: 167357185


In [11]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [12]:
model.cpu()
batch = {
    "input_ids": torch.randint(0, 105879, (2, 512)).long(),
    "attention_mask": torch.tensor([[1] * 300 + [0] * 212, [1] * 200 + [0] * 312], dtype=torch.float),
    "token_type_ids": torch.tensor([[0] * 62 + [1] * 450, [0] * 100 + [1] * 412], dtype=torch.long),
    "labels": torch.tensor([1, 0])
}

outputs = model(**batch)

---

## Make regressor model

In [40]:
from copy import deepcopy
from collections import OrderedDict
from transformers import BertForSequenceClassification, BertConfig
import torch

In [41]:
clf_model = torch.load('./model_checkpoints/bertclassifier-finetune/epoch_12.pt')

In [42]:
clf_model.config.__dict__

{'return_dict': True,
 'output_hidden_states': False,
 'output_attentions': False,
 'torchscript': False,
 'torch_dtype': None,
 'use_bfloat16': False,
 'tf_legacy_loss': False,
 'pruned_heads': {},
 'tie_word_embeddings': True,
 'is_encoder_decoder': False,
 'is_decoder': False,
 'cross_attention_hidden_size': None,
 'add_cross_attention': False,
 'tie_encoder_decoder': False,
 'max_length': 20,
 'min_length': 0,
 'do_sample': False,
 'early_stopping': False,
 'num_beams': 1,
 'num_beam_groups': 1,
 'diversity_penalty': 0.0,
 'temperature': 1.0,
 'top_k': 50,
 'top_p': 1.0,
 'typical_p': 1.0,
 'repetition_penalty': 1.0,
 'length_penalty': 1.0,
 'no_repeat_ngram_size': 0,
 'encoder_no_repeat_ngram_size': 0,
 'bad_words_ids': None,
 'num_return_sequences': 1,
 'chunk_size_feed_forward': 0,
 'output_scores': False,
 'return_dict_in_generate': False,
 'forced_bos_token_id': None,
 'forced_eos_token_id': None,
 'remove_invalid_values': False,
 'exponential_decay_length_penalty': None,
 'su

In [43]:
reg_config = deepcopy(clf_model.config.__dict__)
reg_config['_name_or_path'] = None
reg_config['num_labels'] = 1
reg_config['id2label'] = None
reg_config['label2id'] = None
reg_config['problem_type'] = 'regression'
reg_config['tokenizer_class'] = 'BertTokenizer'
reg_config = BertConfig(**reg_config)
reg_config

BertConfig {
  "_name_or_path": null,
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "problem_type": "regression",
  "tokenizer_class": "BertTokenizer",
  "transformers_version": "4.36.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 105879
}

In [46]:

regressor = BertForSequenceClassification(reg_config)

regressor

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [47]:
regressor.bert = clf_model.bert

regressor.classifier = torch.nn.Sequential(
                            *[
                                torch.nn.Linear(in_features=768, out_features=1, bias=True),
                                torch.nn.Sigmoid()
                            ]
                        )
clf_weights = OrderedDict()
for k,v in clf_model.classifier.state_dict().items():
    print(k, v.shape)
    clf_weights['0.'+k] = torch.chunk(v, 2, dim=0)[-1]
    
regressor.classifier.load_state_dict(clf_weights)

weight torch.Size([2, 768])
bias torch.Size([2])


<All keys matched successfully>

In [49]:
regressor

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [48]:
torch.save(regressor, './model_checkpoints/bertregressor-finetune-base/epoch_12.pt')

In [50]:
from transformers import RobertaForSequenceClassification, RobertaConfig, AutoModel

# tokenizer = AutoTokenizer.from_pretrained('vinai/phobert-base-v2', cache_dir='../hf_cache')
pho_model = AutoModel.from_pretrained('vinai/phobert-base-v2', cache_dir='../hf_cache')

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
config = {
    "_name_or_path": 'vinai/phobert-base-v2',
    "architectures": [
        "RobertForSequenceClassification"
    ],
    "attention_probs_dropout_prob": 0.1,
    "classifier_dropout": None,
    "directionality": "bidi",
    "gradient_checkpointing": False,
    "hidden_act": "gelu",
    "hidden_dropout_prob": 0.1,
    "hidden_size": 768,
    "id2label": {
        "0": "NEG",
        "1": "POS"
    },
    "initializer_range": 0.02,
    "intermediate_size": 3072,
    "label2id": {
        "NEG": 0,
        "POS": 1
    },
    "layer_norm_eps": 1e-5,
    "max_position_embeddings": 258,
    "model_type": "roberta",
    "num_attention_heads": 12,
    "num_hidden_layers": 12,
    "pad_token_id": 1,
    "pooler_fc_size": 768,
    "pooler_num_attention_heads": 12,
    "pooler_num_fc_layers": 3,
    "pooler_size_per_head": 128,
    "pooler_type": "first_token_transform",
    "position_embedding_type": "absolute",
    "tokenizer_class": "PhobertTokenizer",
    "torch_dtype": "float32",
    "problem_type": "single_label_classification",
    "transformers_version": "4.36.2",
    "type_vocab_size": 1,
    "use_cache": True,
    "vocab_size": 64001
}
# config.update(pho_clf.config.__dict__)
config = RobertaConfig(**config)
config

RobertaConfig {
  "_name_or_path": "vinai/phobert-base-v2",
  "architectures": [
    "RobertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "directionality": "bidi",
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "NEG",
    "1": "POS"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NEG": 0,
    "POS": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 258,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "tokenizer_class": "PhobertTokenizer",
  "torch_

In [52]:
pho_clf = RobertaForSequenceClassification(config)

In [53]:
pho_clf

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(258, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerN

In [54]:
pho_clf.roberta = pho_model

In [55]:
torch.save(pho_clf, "./model_checkpoints/phobert-4Quy/epoch_0.pt")

## Train test

In [20]:
if opt['train_from_last_epoch'] and os.path.exists(opt['model_checkpoints']):
    # try:
        files = os.listdir(opt['model_checkpoints'])
        files = sorted(files, key= lambda x : int(re.search(r'\d+', x).group(0)), reverse=True)
        last_ckpt = os.path.join(opt['model_checkpoints'], files[0])
        print(last_ckpt)
        last_ep = int(files[0].lstrip('epoch_').rstrip('.pt'))
        opt['start_epoch'] = last_ep + 1
        model.load_state_dict(torch.load(last_ckpt).state_dict())
        print('Continue training from epoch', opt['start_epoch'])
    # except Exception as e:
    #     print('Error loading last model checkpoint:', e.args)
    #     print('The training process is still going on!')

./model_checkpoints/xlmclassifier/epoch_4.pt
Continue training from epoch 5


In [None]:
device = torch.device(opt['device'])
model.to(device)

optimizer = getattr(torch.optim, opt['optimizer'])(model.parameters(), lr=opt['lr'])

In [None]:
# batch['position_ids'] = torch.arange(0, opt['model']['max_position_embeddings'], dtype=torch.long).expand(opt['batch_size'], -1)
batch = {k:v.to(device) for k,v in batch.items()}

optimizer.zero_grad()
outputs = model(**batch)

loss = outputs.loss
print(loss.item())
loss.backward()
optimizer.step()
batch = {k:v.cpu() for k,v in batch.items()}

0.5213193297386169


---

In [None]:
import torch
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [None]:
start_logits = torch.softmax(torch.randn((100, 2)), dim=-1)
start_labels = torch.randint(0, 2, (100, 1))

In [None]:
print(start_logits)
print(start_labels)

tensor([[0.0504, 0.9496],
        [0.6420, 0.3580],
        [0.4658, 0.5342],
        [0.1141, 0.8859],
        [0.7643, 0.2357],
        [0.2941, 0.7059],
        [0.8811, 0.1189],
        [0.2666, 0.7334],
        [0.3587, 0.6413],
        [0.6410, 0.3590],
        [0.5825, 0.4175],
        [0.4967, 0.5033],
        [0.6839, 0.3161],
        [0.6982, 0.3018],
        [0.8474, 0.1526],
        [0.6509, 0.3491],
        [0.5729, 0.4271],
        [0.6218, 0.3782],
        [0.8771, 0.1229],
        [0.6916, 0.3084],
        [0.2046, 0.7954],
        [0.1067, 0.8933],
        [0.9328, 0.0672],
        [0.1703, 0.8297],
        [0.6900, 0.3100],
        [0.5119, 0.4881],
        [0.8172, 0.1828],
        [0.5643, 0.4357],
        [0.1998, 0.8002],
        [0.1537, 0.8463],
        [0.0659, 0.9341],
        [0.6595, 0.3405],
        [0.4290, 0.5710],
        [0.4847, 0.5153],
        [0.3896, 0.6104],
        [0.4635, 0.5365],
        [0.6442, 0.3558],
        [0.2426, 0.7574],
        [0.3

In [None]:
b_logits = iter(list(torch.chunk(start_logits, chunks=10, dim=0)))
b_labels = iter(list(torch.chunk(start_labels, chunks=10, dim=0)))

In [None]:
next(b_logits).shape

torch.Size([10, 2])

In [None]:
t_loss, tp, tn, fp, fn = 0., 0, 0, 0, 0

for logits_, labels_ in tqdm(zip(b_logits, b_labels)):
    logits = torch.argmax(logits_, dim=-1).flatten()
    labels = labels_.flatten()
    print(logits)
    print(labels)
    tp += torch.sum(((logits == 1) & (labels == 1))).item()
    tn += torch.sum(((logits == 0) & (labels == 0))).item()
    fp += torch.sum(((logits == 1) & (labels == 0))).item()
    fn += torch.sum(((logits == 0) & (labels == 1))).item()

acc = (tp + tn) / (tp + tn + fp + fn)
pre = (tp + 1e-8) / (tp + fp + 1e-8)
rec = (tp + 1e-8) / (tp + fn + 1e-8)
f1 = (2 * pre * rec) / (pre + rec + 1e-8) 

print(acc, f1)

10it [00:00, 1425.18it/s]

tensor([1, 0, 1, 1, 0, 1, 0, 1, 1, 0])
tensor([0, 0, 0, 1, 1, 1, 1, 1, 0, 1])
tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 1, 0, 0, 1, 0, 0, 1, 0])
tensor([1, 1, 0, 1, 0, 0, 0, 0, 1, 1])
tensor([1, 0, 0, 1, 1, 1, 1, 0, 1, 0])
tensor([1, 0, 1, 1, 1, 1, 0, 1, 1, 1])
tensor([0, 0, 0, 1, 1, 1, 0, 1, 0, 0])
tensor([1, 0, 1, 0, 0, 0, 1, 1, 1, 0])
tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])
tensor([0, 0, 0, 1, 0, 1, 1, 0, 1, 0])
tensor([1, 0, 0, 1, 0, 0, 1, 1, 1, 0])
tensor([1, 0, 0, 1, 1, 0, 0, 1, 1, 0])
tensor([0, 0, 0, 1, 0, 1, 0, 0, 1, 0])
tensor([1, 1, 0, 0, 0, 0, 1, 0, 1, 0])
tensor([0, 0, 1, 1, 0, 0, 1, 0, 0, 1])
tensor([0, 1, 1, 1, 1, 1, 0, 0, 1, 0])
tensor([1, 0, 0, 0, 0, 1, 1, 1, 0, 0])
tensor([1, 0, 1, 0, 0, 1, 1, 0, 0, 1])
tensor([1, 1, 1, 0, 1, 1, 0, 1, 1, 1])
0.53 0.515463912626209





In [None]:
print(accuracy_score(start_labels.numpy(), torch.argmax(start_logits, dim=-1).numpy()))
print(f1_score(start_labels.numpy(), torch.argmax(start_logits, dim=-1).numpy()))

0.53
0.5154639175257733


In [None]:
from tqdm import tqdm
f = open('./training_logs.txt', 'w')
for i in tqdm(range(100), file=f):
    print(i)

---

In [None]:
from datasets import load_dataset

dataset = load_dataset('anti-ai/ViNLI-SimCSE-supervised', cache_dir='./data')

Downloading readme: 100%|██████████| 110/110 [00:00<00:00, 183kB/s]
Downloading data: 100%|██████████| 45.5M/45.5M [00:03<00:00, 12.8MB/s]
Generating train split: 127737 examples [00:00, 309117.56 examples/s]


In [None]:
dataset['train'].to_json('./data/ViNLI-SimCSE-supervised.json')

Creating json from Arrow format: 100%|██████████| 128/128 [00:02<00:00, 52.41ba/s]


68884379

## ViNLI-Zalo, ViNLI-SimCSE

In [None]:
import json
data_path = '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/ViNLI-Zalo-supervised.json'

raw_data = open(data_path, 'r').readlines() if data_path else []
data = []
for item in raw_data:
    item_ = json.loads(item)
    question = item_['anchor']
    context_p = item_['pos']
    context_n = item_['hard_neg']
    data.append(
        {
            "query": question,
            "context": context_p,
            "label": 1,
        }
    )
    data.append(
        {
            "query": question,
            "context": context_n,
            "label": 0,
        }
    )

In [None]:
import csv

In [None]:
dest_path = '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/csv/ViNLI-Zalo-supervised.csv'

def write_to_csv(data, dest_path, fieldnames=['query', 'context', 'label']):
    with open(dest_path, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)

        # Viết tiêu đề (tên cột)
        writer.writeheader()

        # Viết dữ liệu từ danh sách các từ điển
        for row in data:
            writer.writerow(row)
        file.close()


## ViMMRC, IR

In [None]:
data_path = ['/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/ViMMRC_segment.json',
             '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/train_IR_segment.json']

dest_path = ['/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/csv/ViMMRC_segment.csv',
             '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/csv/train_IR_segment.csv']

for path, d_path in zip(data_path, dest_path):
    raw_data = json.load(open(path, 'r'))
    data = []
    for item in raw_data:
        question = item['question']
        context = item['context']
        label = item['labels']
        data.append(
            {
                "query": question,
                "context": context,
                "label": label
            }
        )
    write_to_csv(data, d_path)
    

## ViSTS

In [None]:
data_path = '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/ViSTS-segment.json'
dest_path = '/workspace/nlplab/kienvt/KLTN/proc1-extract_storage/cross-encoder/data/segment/csv/ViSTS-segment.csv'

In [None]:
raw_data = open(data_path, 'r').readlines() if data_path else []
data = []
for item in raw_data:
    item_ = json.loads(item)
    sent1 = item_['sentence1']
    sent2 = item_['sentence2']
    score = item_['score']
    new_item = {
        "query": sent1,
        "context": sent2, 
        "label": round(score / 5, 2),
    }
    data.append(new_item)
    
write_to_csv(data, dest_path)

---

In [None]:
from typing import Any
from transformers import AutoModelForMaskedLM

roberta = AutoModelForMaskedLM.from_pretrained("roberta-large")

def visualize_children(
    object : Any,
    level : int = 0,
) -> None:
    """
    Prints the children of (object) and their children too, if there are any.
    Uses the current depth (level) to print things in a ordonnate manner.
    """
    print(f"{'   ' * level}{level}- {type(object).__name__}")
    try:
        for child in object.children():
            visualize_children(child, level + 1)
    except:
        pass

visualize_children(roberta)

---

In [15]:
from transformers import pipeline
import os
import torch
os.environ['TRANSFORMERS_CACHE'] = '../hf_cache'
# Replace this with your own checkpoint
model_checkpoint = "chieunq/XLM-R-base-finetuned-uit-vquad-1"
question_answerer = pipeline("question-answering", model=model_checkpoint)

context = """
Nhóm của chúng tôi là sinh viên năm 4 trường ĐH Công Nghệ - ĐHQG Hà Nội. Nhóm gồm 3 thành viên : Nguyễn Quang Chiều, Nguyễn Quang Huy và Nguyễn Trần Anh Đức . Đây là pha Reader trong dự án cuồi kì môn Các vấn đề hiện đại trong CNTT của nhóm . 
"""
question = "3 thành viên trong nhóm gồm những ai ?"
question_answerer(question=question, context=context)


{'score': 0.9925937652587891,
 'start': 98,
 'end': 157,
 'answer': 'Nguyễn Quang Chiều, Nguyễn Quang Huy và Nguyễn Trần Anh Đức'}

In [25]:
example = question_answerer.create_sample(question=question, context=context)
for i in question_answerer.preprocess(example):
    print(i)

{'example': <transformers.data.processors.squad.SquadExample object at 0x7f3323f35370>, 'is_last': True, 'input_ids': tensor([[     0,    138,   2781,   4603,   1000,  42106,  33256,   1358,   1300,
            705,      2,      2, 233934,    550,   4006,   2259,    580,   3811,
           4603,   2933,    201,   4373, 164241,   8215,  96845,     20,   4428,
          95795,    724,   8548,   9435,      5, 233934,  33256,    138,   2781,
           4603,    152,  19098,  78542, 220222,      4,  19098,  78542,  88238,
            544,  19098,  42974,   9735,  29030,      6,      5,  37540,    580,
          40681, 122636,   1000,   9609,   7742,    314,  78379,  84416,  37496,
           9211,   8529,   6248,   2812,   7899,   1000,    313, 188522,    550,
          42106,      6,      5,      2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1

In [38]:
a = dict(question_answerer.tokenizer(text=question, 
                            text_pair=context, 
                            max_length=514, 
                            padding=False, 
                            truncation='only_second', 
                            return_token_type_ids=True,
                            return_attention_mask=True))
a.update({
            "labels": 1
        })
a.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'labels'])

In [None]:
question_answerer.tokenizer(text)

In [34]:
question_answerer.tokenizer.model_input_names

['input_ids', 'attention_mask']

In [None]:
from transformers import AutoModelForSequenceClassification
model_checkpoint = "chieunq/XLM-R-base-finetuned-uit-vquad-1"
classifier = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2, classifier_dropout=0.1, problem_type='single_label_classification')

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at chieunq/XLM-R-base-finetuned-uit-vquad-1 and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
for p in classifier.parameters():
    print(p)

Parameter containing:
tensor([[ 0.1623,  0.1492,  0.1608,  ...,  0.1344,  0.1871,  0.0664],
        [-0.0073,  0.0048, -0.0078,  ...,  0.0078,  0.0041, -0.0078],
        [ 0.2037,  0.2546,  0.1320,  ...,  0.1738,  0.0238,  0.2510],
        ...,
        [ 0.3815, -0.4433,  0.1388,  ...,  0.2266,  0.0540,  0.1032],
        [ 0.0205, -0.1243,  0.0191,  ..., -0.0143,  0.0376, -0.1144],
        [ 0.1008,  0.0587,  0.0551,  ...,  0.1231, -0.0064,  0.1216]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0578, -0.0071, -0.0068,  ...,  0.0061, -0.0260, -0.0291],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1569, -0.0743, -0.2471,  ..., -0.0783, -0.3075, -0.0098],
        ...,
        [ 0.0118,  0.0458, -0.0054,  ..., -0.0864,  0.0374,  0.0040],
        [ 0.0525, -0.0270, -0.0141,  ..., -0.0552,  0.0349,  0.0274],
        [-0.0479, -0.0293,  0.1079,  ..., -0.0824,  0.2906,  0.0861]],
       requires_grad=True)
Parameter containing:
tensor([[-6.

In [None]:
import torch
torch.save(classifier, './model_checkpoints/xlmclassifier/epoch-0.pt')

In [None]:
del classifier

In [None]:
regressor = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1, classifier_dropout=0.1, problem_type='regression')

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at chieunq/XLM-R-base-finetuned-uit-vquad-1 and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
torch.save(regressor, './model_checkpoints/xlmregressor/epoch-0.pt')
del regressor

In [None]:
for p in regressor.parameters():
    print(p)

In [None]:
torch.save(question_answerer.model.roberta, './model_checkpoints/roberta/chieunq-XLM-R-base-finetuned-uit-vquad-1.pt')

In [None]:
import json
json.dump(question_answerer.tokenizer.vocab, open('./chieunq-XLM-R-base-finetuned-uit-vquad-1.json', 'w', encoding='utf-8'), ensure_ascii=False)

In [None]:
tokenizer = question_answerer.tokenizer

tokenizer('Tôi là sinh viên Đại học')

{'input_ids': [0, 14343, 580, 3811, 4603, 18832, 2546, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}

===

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base")  
model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base")

In [None]:
model.config

T5Config {
  "_name_or_path": "VietAI/vit5-base",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "torch_dtype": "float32",
  "transformers_version": "4.36.2",
  "use_cache": true,
  "vocab_size": 36096
}

In [None]:
model

T5ForConditionalGeneration(
  (shared): Embedding(36096, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(36096, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [None]:
from transformers import XLMRobertaForSequenceClassification, XLMRobertaConfig


model = XLMRobertaForSequenceClassification.from_pretrained('chieunq/xlm-r-base-uit-viquad')

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at chieunq/xlm-r-base-uit-viquad and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model

XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0): XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=Tr

In [12]:
import torch
from transformers import AutoModel, AutoTokenizer

model = AutoModel.from_pretrained("amberoad/bert-multilingual-passage-reranking-msmarco", cache_dir="../hf_cache")
tokenizer = AutoTokenizer.from_pretrained("amberoad/bert-multilingual-passage-reranking-msmarco", cache_dir="../hf_cache")

sentence = 'Vượt đèn đỏ bị phạt bao nhiêu tiền?'  

input_ids = torch.tensor([tokenizer.encode(sentence)])

with torch.no_grad():
    features = model(input_ids)  # Models outputs are now tuples


In [1]:
from sentence_transformers import CrossEncoder

ce_model = CrossEncoder("amberoad/bert-multilingual-passage-reranking-msmarco", num_labels=2, device='cuda')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
ce_model.model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [6]:
ce_model.model.device

device(type='cpu')

In [8]:
query = 'Quang Hải giành được chức vô địch U21 quốc gia năm bao nhiêu tuổi?'

documents = [
    'Năm 2011 , Nguyễn Quang Hải mới 14 tuổi được gọi lên đội tuyển U-16 Việt Nam , thi đấu với những cầu thủ lớn tuổi hơn tại vòng loại giải vô địch bóng đá U-16 châu Á .',
    'Năm 2013 , Nguyễn Quang Hải giành chức vô địch U21 quốc gia 2013 cùng với đội trẻ Hà Nội T&T và tạo nên cú sốc khi trở thành cầu thủ 16 tuổi đầu tiên giành được danh hiệu vô địch U21 quốc gia .',
    'Sau chức vô địch U-21 quốc gia 2013 , Nguyễn Quang Hải mới 16 tuổi lập tức được HLV Phan Thanh Hùng điền vào danh sách của đội bóng thủ đô tham dự V-League 2014 .',
    'Anh bắt đầu gia nhập lò đào tạo trẻ Hà Nội T&T khi mới 9 tuổi vào năm 2006 .',
    'Một trận thi đấu bóng đá thông thường diễn ra trong hai hiệp chính thức liên tiếp , mỗi hiệp gồm 45 phút ngăn cách bằng 15 phút nghỉ giữa giờ . Sau khi hiệp 1 , hai đội bóng sẽ phải đổi sân cho nhau để có sự công bằng trong vòng 1 phút .'
]
pairs = [[query, doc] for doc in documents]

# tokenized = ce_model.tokenizer(text=query, text_pair=documents[-1], padding=False, truncation="only_second", return_tensors="pt", max_length=512)

In [10]:
outputs = ce_model.predict(pairs, show_progress_bar=True, apply_softmax=True, convert_to_numpy=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Batches: 100%|██████████| 1/1 [00:01<00:00,  1.98s/it]


In [13]:
import numpy as np

In [15]:
outputs[:, -1].tolist()

[0.971642792224884,
 0.9995723366737366,
 0.9870374202728271,
 0.00034598022466525435,
 3.3993848774116486e-05]

: 

In [61]:
tokenized

{'input_ids': tensor([[  101, 20624, 11938, 39587, 10554, 16161, 11821, 25901, 78031, 12374,
         11592, 10423, 12658, 13699, 22263,   136,   102, 10381, 13802, 12059,
         11104, 17882, 11135, 13315, 13273, 13297, 11742, 10492, 11938, 30700,
         12365, 11663, 14415, 16879,   117, 12277, 30700, 16444, 10931, 64426,
         10703, 15471, 12221, 10217, 64426, 20266, 20608, 22935,   119, 11356,
         11839, 30700,   122,   117, 11938, 11705, 17882, 10128, 14692, 11705,
         10418, 11268, 21136, 11654, 10348, 10192, 11638, 12221, 10492, 21780,
           122, 64426,   119,   102]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [40]:
print(yaml.dump(ce_model.model.config, sort_keys=False))

!!python/object:transformers.models.bert.configuration_bert.BertConfig
return_dict: true
output_hidden_states: false
output_attentions: false
torchscript: false
torch_dtype: null
use_bfloat16: false
tf_legacy_loss: false
pruned_heads: {}
tie_word_embeddings: true
is_encoder_decoder: false
is_decoder: false
cross_attention_hidden_size: null
add_cross_attention: false
tie_encoder_decoder: false
max_length: 20
min_length: 0
do_sample: false
early_stopping: false
num_beams: 1
num_beam_groups: 1
diversity_penalty: 0.0
temperature: 1.0
top_k: 50
top_p: 1.0
typical_p: 1.0
repetition_penalty: 1.0
length_penalty: 1.0
no_repeat_ngram_size: 0
encoder_no_repeat_ngram_size: 0
bad_words_ids: null
num_return_sequences: 1
chunk_size_feed_forward: 0
output_scores: false
return_dict_in_generate: false
forced_bos_token_id: null
forced_eos_token_id: null
remove_invalid_values: false
exponential_decay_length_penalty: null
suppress_tokens: null
begin_suppress_tokens: null
architectures:
- BertForSequenceCla

In [44]:
torch.save(ce_model.model, './model_checkpoints/bertmultilingual/amberoad--bert-multilingual-passage-reranking-msmarco.pt')

In [21]:
def contrastive_criterion(x1, x2, label, margin: float = 1.0):
    """
    Computes Contrastive Loss
    """

    dist = torch.nn.functional.pairwise_distance(x1, x2)

    loss = (1 - label) * torch.pow(dist, 2) \
        + (label) * torch.pow(torch.clamp(margin - dist, min=0.0), 2)
    loss = torch.mean(loss)

    return loss

In [None]:
logits

---

In [50]:
import torch

model = torch.load('./model_checkpoints/bertregressor-finetune/epoch_6.pt')

In [51]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [52]:
import huggingface_hub as hf

hf.login(token='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')

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/nlplab/.cache/huggingface/token
Login successful


In [53]:
model.push_to_hub('kien-vu-uet/finetuned-bert-multilingual-passage-reranking-msmarco-regressor')

model.safetensors: 100%|██████████| 669M/669M [00:50<00:00, 13.3MB/s] 


CommitInfo(commit_url='https://huggingface.co/kien-vu-uet/finetuned-bert-multilingual-passage-reranking-msmarco-regressor/commit/c04cfa28bf6cf8c653964f8126b549d962b7632e', commit_message='Upload BertForSequenceClassification', commit_description='', oid='c04cfa28bf6cf8c653964f8126b549d962b7632e', pr_url=None, pr_revision=None, pr_num=None)

In [54]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("amberoad/bert-multilingual-passage-reranking-msmarco", cache_dir="../hf_cache")


In [55]:
tokenizer.push_to_hub('kien-vu-uet/finetuned-bert-multilingual-passage-reranking-msmarco-regressor')

CommitInfo(commit_url='https://huggingface.co/kien-vu-uet/finetuned-bert-multilingual-passage-reranking-msmarco-regressor/commit/6428f3db0fe2a00eae98747678b308030b68b33f', commit_message='Upload tokenizer', commit_description='', oid='6428f3db0fe2a00eae98747678b308030b68b33f', pr_url=None, pr_revision=None, pr_num=None)

: 

In [18]:
from sentence_transformers import CrossEncoder

ce_pipe = CrossEncoder("kien-vu-uet/finetuned-bert-multilingual-passage-reranking-msmarco", 
                       automodel_args={"cache_dir": '../hf_cache', "token": "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"})