In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
DATASET_CSABSTRUCT_DIR = './gdrive/Shareddrives/DATASETS/CSAbstruct/'
OUTPUT_MODEL_DIR = './gdrive/Shareddrives/MODELS/'

In [3]:
!pip install transformers -q
!pip install sentence_transformers -q

[K     |████████████████████████████████| 4.7 MB 6.9 MB/s 
[K     |████████████████████████████████| 6.6 MB 45.7 MB/s 
[K     |████████████████████████████████| 101 kB 12.4 MB/s 
[K     |████████████████████████████████| 596 kB 70.2 MB/s 
[K     |████████████████████████████████| 85 kB 3.9 MB/s 
[K     |████████████████████████████████| 1.3 MB 18.0 MB/s 
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone


In [4]:
import pandas as pd

In [5]:
df_train = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'train.parquet')
df_dev = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'df_dev_triplets.parquet')
df_test = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'df_test_triplets.parquet')

In [6]:
df_train.columns

Index(['id', 'sentence', 'subject_label', 'label_id'], dtype='object')

In [7]:
df_dev.columns

Index(['anchor', 'positive', 'negative'], dtype='object')

In [8]:
# same text but with different labels
print(f'{len(df_train.index)}')
mask = df_train.groupby('sentence')['label_id'].transform('nunique') > 1
df_train = df_train[~mask].copy()
print(f'{len(df_train.index)}')

11333
11250


In [9]:
from sentence_transformers import InputExample
from tqdm import tqdm

train_set = []
guid = 1
for idx, row in tqdm(df_train.iterrows(), total=len(df_train.index)):
    train_set.append(InputExample(
        guid=guid,
        texts=[row['sentence']],
        label=row['label_id']
    ))
    guid += 1
len(train_set)

100%|██████████| 11250/11250 [00:01<00:00, 10057.65it/s]


11250

In [10]:
dev_set = []

guid = 1
for idx, row in tqdm(df_dev.iterrows(), total=len(df_dev.index)):
    dev_set.append(InputExample(
        guid=guid,
        texts=[row['anchor'], row['positive'], row['negative']],
    ))
    guid += 1
len(dev_set)

100%|██████████| 2024/2024 [00:00<00:00, 9268.81it/s]


2024

In [11]:
test_set = []

guid = 1
for idx, row in tqdm(df_test.iterrows(), total=len(df_test.index)):
    test_set.append(InputExample(
        guid=guid,
        texts=[row['anchor'], row['positive'], row['negative']],
    ))
    guid += 1
len(test_set)

100%|██████████| 1349/1349 [00:00<00:00, 4850.42it/s]


1349

In [12]:
import logging
from sentence_transformers import LoggingHandler

logging.basicConfig(
    format="%(asctime)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO,
    handlers=[LoggingHandler()],
)

In [13]:
from datetime import datetime
model_name = 'allenai/scibert_scivocab_uncased'
model_file_name = model_name.split('/')[-1] + '_CSAbstruct_TripletAll'

train_batch_size = 32
output_path = (
    OUTPUT_MODEL_DIR
    + model_file_name
    + "-"
    + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
)
output_path

'./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03'

In [14]:
from sentence_transformers import models, SentenceTransformer

bert = models.Transformer(model_name)
pooler = models.Pooling(
    bert.get_word_embedding_dimension(),
    pooling_mode_mean_tokens=True
)

model = SentenceTransformer(modules=[bert, pooler])

Downloading config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/422M [00:00<?, ?B/s]

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading vocab.txt:   0%|          | 0.00/223k [00:00<?, ?B/s]

2022-08-08 12:07:20 - Use pytorch device: cuda


In [15]:
from sentence_transformers import datasets
from torch.utils.data import DataLoader

loader = DataLoader(train_set, shuffle=True, batch_size=train_batch_size)

In [16]:
from sentence_transformers import losses

train_loss = losses.BatchAllTripletLoss(model=model)

In [17]:
from sentence_transformers.evaluation import TripletEvaluator

dev_evaluator = TripletEvaluator.from_input_examples(
    dev_set, write_csv=True, show_progress_bar=True, name='csabstruct-dev'
)

In [18]:
logging.info("Performance before fine-tuning:")
dev_evaluator(model)

2022-08-08 12:07:20 - Performance before fine-tuning:
2022-08-08 12:07:20 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:07:36 - Accuracy Cosine Distance:   	55.04
2022-08-08 12:07:36 - Accuracy Manhattan Distance:	54.69
2022-08-08 12:07:36 - Accuracy Euclidean Distance:	54.59



0.5503952569169961

In [19]:
logging.info("Evaluating model on test set")
test_evaluator = TripletEvaluator.from_input_examples(
    test_set, write_csv=True, show_progress_bar=True, name='csabstruct-test'
)
test_evaluator(model)

2022-08-08 12:07:36 - Evaluating model on test set
2022-08-08 12:07:36 - TripletEvaluator: Evaluating the model on csabstruct-test dataset:


Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

2022-08-08 12:07:42 - Accuracy Cosine Distance:   	57.52
2022-08-08 12:07:42 - Accuracy Manhattan Distance:	56.93
2022-08-08 12:07:42 - Accuracy Euclidean Distance:	57.15



0.575240919199407

In [20]:
num_epochs = 20

warmup_steps = int(len(loader) * num_epochs  * 0.1)  # 10% of train data

In [21]:
model_output_path = output_path + '/model'
checkpoint_output_path = output_path + '/checkpoint'

print(model_output_path)
print(checkpoint_output_path)

./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/model
./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint


In [23]:
%%time
model.fit(
    train_objectives=[(loader, train_loss)],
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path=model_output_path,
    show_progress_bar=True,
    evaluator=dev_evaluator,
    save_best_model=True,
    checkpoint_save_total_limit=1,
    checkpoint_path=checkpoint_output_path    
)  

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:09:25 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 0:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:09:34 - Accuracy Cosine Distance:   	66.60
2022-08-08 12:09:34 - Accuracy Manhattan Distance:	67.14
2022-08-08 12:09:34 - Accuracy Euclidean Distance:	67.29

2022-08-08 12:09:34 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:10:10 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/500
2022-08-08 12:10:57 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 1:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:11:06 - Accuracy Cosine Distance:   	68.97
2022-08-08 12:11:06 - Accuracy Manhattan Distance:	68.77
2022-08-08 12:11:06 - Accuracy Euclidean Distance:	68.82

2022-08-08 12:11:06 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:12:15 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/1000
2022-08-08 12:12:28 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 2:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:12:38 - Accuracy Cosine Distance:   	68.73
2022-08-08 12:12:38 - Accuracy Manhattan Distance:	69.22
2022-08-08 12:12:38 - Accuracy Euclidean Distance:	68.82

2022-08-08 12:12:38 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:13:59 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 3:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:14:08 - Accuracy Cosine Distance:   	68.77
2022-08-08 12:14:08 - Accuracy Manhattan Distance:	68.48
2022-08-08 12:14:08 - Accuracy Euclidean Distance:	68.77



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:14:29 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/1500
2022-08-08 12:15:29 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 4:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:15:39 - Accuracy Cosine Distance:   	67.84
2022-08-08 12:15:39 - Accuracy Manhattan Distance:	67.69
2022-08-08 12:15:39 - Accuracy Euclidean Distance:	67.49



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:16:33 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/2000
2022-08-08 12:16:58 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 5:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:17:08 - Accuracy Cosine Distance:   	67.34
2022-08-08 12:17:08 - Accuracy Manhattan Distance:	67.64
2022-08-08 12:17:08 - Accuracy Euclidean Distance:	67.54



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:18:26 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 6:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:18:35 - Accuracy Cosine Distance:   	65.37
2022-08-08 12:18:35 - Accuracy Manhattan Distance:	65.27
2022-08-08 12:18:35 - Accuracy Euclidean Distance:	65.27



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:18:43 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/2500
2022-08-08 12:19:56 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 7:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:20:05 - Accuracy Cosine Distance:   	67.05
2022-08-08 12:20:05 - Accuracy Manhattan Distance:	66.45
2022-08-08 12:20:05 - Accuracy Euclidean Distance:	66.75



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:20:47 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/3000
2022-08-08 12:21:26 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 8:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:21:36 - Accuracy Cosine Distance:   	64.43
2022-08-08 12:21:36 - Accuracy Manhattan Distance:	63.88
2022-08-08 12:21:36 - Accuracy Euclidean Distance:	63.83



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:22:50 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/3500
2022-08-08 12:22:57 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 9:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:23:07 - Accuracy Cosine Distance:   	63.14
2022-08-08 12:23:07 - Accuracy Manhattan Distance:	62.10
2022-08-08 12:23:07 - Accuracy Euclidean Distance:	62.65



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:24:26 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 10:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:24:35 - Accuracy Cosine Distance:   	63.78
2022-08-08 12:24:35 - Accuracy Manhattan Distance:	64.03
2022-08-08 12:24:35 - Accuracy Euclidean Distance:	64.13



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:25:05 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/4000
2022-08-08 12:25:57 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 11:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:26:06 - Accuracy Cosine Distance:   	64.08
2022-08-08 12:26:06 - Accuracy Manhattan Distance:	63.59
2022-08-08 12:26:06 - Accuracy Euclidean Distance:	63.98



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:27:08 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/4500
2022-08-08 12:27:27 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 12:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:27:37 - Accuracy Cosine Distance:   	64.43
2022-08-08 12:27:37 - Accuracy Manhattan Distance:	64.18
2022-08-08 12:27:37 - Accuracy Euclidean Distance:	64.43



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:28:56 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 13:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:29:05 - Accuracy Cosine Distance:   	63.29
2022-08-08 12:29:05 - Accuracy Manhattan Distance:	63.29
2022-08-08 12:29:05 - Accuracy Euclidean Distance:	63.59



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:29:21 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/5000
2022-08-08 12:30:25 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 14:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:30:35 - Accuracy Cosine Distance:   	63.78
2022-08-08 12:30:35 - Accuracy Manhattan Distance:	62.75
2022-08-08 12:30:35 - Accuracy Euclidean Distance:	63.19



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:31:23 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/5500
2022-08-08 12:31:55 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 15:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:32:05 - Accuracy Cosine Distance:   	62.60
2022-08-08 12:32:05 - Accuracy Manhattan Distance:	62.01
2022-08-08 12:32:05 - Accuracy Euclidean Distance:	62.45



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:33:23 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 16:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:33:32 - Accuracy Cosine Distance:   	62.90
2022-08-08 12:33:32 - Accuracy Manhattan Distance:	62.75
2022-08-08 12:33:32 - Accuracy Euclidean Distance:	62.65



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:33:36 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/6000
2022-08-08 12:34:52 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 17:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:35:02 - Accuracy Cosine Distance:   	62.99
2022-08-08 12:35:02 - Accuracy Manhattan Distance:	62.90
2022-08-08 12:35:02 - Accuracy Euclidean Distance:	63.54



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:35:39 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/6500
2022-08-08 12:36:22 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 18:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:36:32 - Accuracy Cosine Distance:   	63.83
2022-08-08 12:36:32 - Accuracy Manhattan Distance:	63.74
2022-08-08 12:36:32 - Accuracy Euclidean Distance:	63.88



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:37:41 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/7000
2022-08-08 12:37:53 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 19:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:38:03 - Accuracy Cosine Distance:   	63.69
2022-08-08 12:38:03 - Accuracy Manhattan Distance:	63.39
2022-08-08 12:38:03 - Accuracy Euclidean Distance:	63.44

2022-08-08 12:38:03 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03/checkpoint/7040
CPU times: user 23min 30s, sys: 5min 58s, total: 29min 28s
Wall time: 30min 3s


In [24]:
logging.info("Evaluating model on test set")
test_evaluator = TripletEvaluator.from_input_examples(
    test_set, write_csv=True, show_progress_bar=True, name='csabstruct-test'
)
model.evaluate(test_evaluator)

2022-08-08 12:38:04 - Evaluating model on test set
2022-08-08 12:38:04 - TripletEvaluator: Evaluating the model on csabstruct-test dataset:


Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

2022-08-08 12:38:11 - Accuracy Cosine Distance:   	75.76
2022-08-08 12:38:11 - Accuracy Manhattan Distance:	76.65
2022-08-08 12:38:11 - Accuracy Euclidean Distance:	76.28



0.7664936990363233

In [25]:
output_path + "_final"

'./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03_final'

In [26]:
model.save(output_path + "_final")

2022-08-08 12:38:11 - Save model to ./gdrive/Shareddrives/MODELS/scibert_scivocab_uncased_CSAbstruct_TripletAll-2022-08-08_12-07-03_final
