In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
DATASET_CSABSTRUCT_DIR = './gdrive/Shareddrives/DATASETS/CSAbstruct/'
OUTPUT_MODEL_DIR = './gdrive/Shareddrives/MODELS/'

In [3]:
!pip install transformers -q
!pip install sentence_transformers -q

[K     |████████████████████████████████| 4.7 MB 35.0 MB/s 
[K     |████████████████████████████████| 596 kB 58.3 MB/s 
[K     |████████████████████████████████| 101 kB 12.8 MB/s 
[K     |████████████████████████████████| 6.6 MB 54.9 MB/s 
[K     |████████████████████████████████| 85 kB 4.8 MB/s 
[K     |████████████████████████████████| 1.3 MB 62.9 MB/s 
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone


In [4]:
import pandas as pd

In [5]:
df_train = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'train.parquet')
df_dev = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'df_dev_triplets.parquet')
df_test = pd.read_parquet(DATASET_CSABSTRUCT_DIR + 'df_test_triplets.parquet')

In [6]:
df_train.columns

Index(['id', 'sentence', 'subject_label', 'label_id'], dtype='object')

In [7]:
df_dev.columns

Index(['anchor', 'positive', 'negative'], dtype='object')

In [8]:
# same text but with different labels
print(f'{len(df_train.index)}')
mask = df_train.groupby('sentence')['label_id'].transform('nunique') > 1
df_train = df_train[~mask].copy()
print(f'{len(df_train.index)}')

11333
11250


In [9]:
from sentence_transformers import InputExample
from tqdm import tqdm

train_set = []
guid = 1
for idx, row in tqdm(df_train.iterrows(), total=len(df_train.index)):
    train_set.append(InputExample(
        guid=guid,
        texts=[row['sentence']],
        label=row['label_id']
    ))
    guid += 1
len(train_set)

100%|██████████| 11250/11250 [00:00<00:00, 20740.51it/s]


11250

In [10]:
dev_set = []

guid = 1
for idx, row in tqdm(df_dev.iterrows(), total=len(df_dev.index)):
    dev_set.append(InputExample(
        guid=guid,
        texts=[row['anchor'], row['positive'], row['negative']],
    ))
    guid += 1
len(dev_set)

100%|██████████| 2024/2024 [00:00<00:00, 18959.76it/s]


2024

In [11]:
test_set = []

guid = 1
for idx, row in tqdm(df_test.iterrows(), total=len(df_test.index)):
    test_set.append(InputExample(
        guid=guid,
        texts=[row['anchor'], row['positive'], row['negative']],
    ))
    guid += 1
len(test_set)

100%|██████████| 1349/1349 [00:00<00:00, 18477.11it/s]


1349

In [12]:
import logging
from sentence_transformers import LoggingHandler

logging.basicConfig(
    format="%(asctime)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO,
    handlers=[LoggingHandler()],
)

In [13]:
from datetime import datetime
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
model_file_name = model_name.split('/')[-1] + '_CSAbstruct_TripletAll'

train_batch_size = 32
output_path = (
    OUTPUT_MODEL_DIR
    + model_file_name
    + "-"
    + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
)
output_path

'./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04'

In [14]:
from sentence_transformers import models, SentenceTransformer

model = SentenceTransformer(model_name, device='cuda')

2022-08-08 12:11:05 - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [15]:
from sentence_transformers import datasets
from torch.utils.data import DataLoader

loader = DataLoader(train_set, shuffle=True, batch_size=train_batch_size)

In [16]:
from sentence_transformers import losses

train_loss = losses.BatchAllTripletLoss(model=model)

In [17]:
from sentence_transformers.evaluation import TripletEvaluator

dev_evaluator = TripletEvaluator.from_input_examples(
    dev_set, write_csv=True, show_progress_bar=True, name='csabstruct-dev'
)

In [18]:
logging.info("Performance before fine-tuning:")
dev_evaluator(model)

2022-08-08 12:11:35 - Performance before fine-tuning:
2022-08-08 12:11:35 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:11:45 - Accuracy Cosine Distance:   	52.03
2022-08-08 12:11:45 - Accuracy Manhattan Distance:	53.21
2022-08-08 12:11:45 - Accuracy Euclidean Distance:	52.03



0.5321146245059288

In [19]:
logging.info("Evaluating model on test set")
test_evaluator = TripletEvaluator.from_input_examples(
    test_set, write_csv=True, show_progress_bar=True, name='csabstruct-test'
)
test_evaluator(model)

2022-08-08 12:11:45 - Evaluating model on test set
2022-08-08 12:11:45 - TripletEvaluator: Evaluating the model on csabstruct-test dataset:


Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

2022-08-08 12:11:48 - Accuracy Cosine Distance:   	56.12
2022-08-08 12:11:48 - Accuracy Manhattan Distance:	55.23
2022-08-08 12:11:48 - Accuracy Euclidean Distance:	56.12



0.5611564121571534

In [20]:
num_epochs = 20

warmup_steps = int(len(loader) * num_epochs  * 0.1)  # 10% of train data

In [21]:
model_output_path = output_path + '/model'
checkpoint_output_path = output_path + '/checkpoint'

print(model_output_path)
print(checkpoint_output_path)

./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/model
./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint


In [22]:
%%time
model.fit(
    train_objectives=[(loader, train_loss)],
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path=model_output_path,
    show_progress_bar=True,
    evaluator=dev_evaluator,
    save_best_model=True,
    checkpoint_save_total_limit=1,
    checkpoint_path=checkpoint_output_path    
)  

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:12:14 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 0:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:12:18 - Accuracy Cosine Distance:   	62.06
2022-08-08 12:12:18 - Accuracy Manhattan Distance:	61.41
2022-08-08 12:12:18 - Accuracy Euclidean Distance:	62.06

2022-08-08 12:12:18 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:12:27 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/500
2022-08-08 12:12:41 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 1:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:12:44 - Accuracy Cosine Distance:   	61.31
2022-08-08 12:12:44 - Accuracy Manhattan Distance:	60.97
2022-08-08 12:12:44 - Accuracy Euclidean Distance:	61.31



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:13:03 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/1000
2022-08-08 12:13:07 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 2:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:13:11 - Accuracy Cosine Distance:   	63.34
2022-08-08 12:13:11 - Accuracy Manhattan Distance:	63.14
2022-08-08 12:13:11 - Accuracy Euclidean Distance:	63.34

2022-08-08 12:13:11 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:13:34 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 3:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:13:38 - Accuracy Cosine Distance:   	63.14
2022-08-08 12:13:38 - Accuracy Manhattan Distance:	62.50
2022-08-08 12:13:38 - Accuracy Euclidean Distance:	63.14



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:13:43 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/1500
2022-08-08 12:14:01 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 4:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:14:05 - Accuracy Cosine Distance:   	64.03
2022-08-08 12:14:05 - Accuracy Manhattan Distance:	63.49
2022-08-08 12:14:05 - Accuracy Euclidean Distance:	64.03

2022-08-08 12:14:05 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/model


Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:14:21 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/2000
2022-08-08 12:14:29 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 5:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:14:33 - Accuracy Cosine Distance:   	63.74
2022-08-08 12:14:33 - Accuracy Manhattan Distance:	62.75
2022-08-08 12:14:33 - Accuracy Euclidean Distance:	63.74



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:14:56 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 6:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:14:59 - Accuracy Cosine Distance:   	63.49
2022-08-08 12:14:59 - Accuracy Manhattan Distance:	62.55
2022-08-08 12:14:59 - Accuracy Euclidean Distance:	63.49



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:15:02 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/2500
2022-08-08 12:15:23 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 7:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:15:27 - Accuracy Cosine Distance:   	63.49
2022-08-08 12:15:27 - Accuracy Manhattan Distance:	62.25
2022-08-08 12:15:27 - Accuracy Euclidean Distance:	63.49



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:15:39 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/3000
2022-08-08 12:15:50 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 8:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:15:54 - Accuracy Cosine Distance:   	63.34
2022-08-08 12:15:54 - Accuracy Manhattan Distance:	62.65
2022-08-08 12:15:54 - Accuracy Euclidean Distance:	63.34



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:16:16 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/3500
2022-08-08 12:16:18 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 9:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:16:21 - Accuracy Cosine Distance:   	63.29
2022-08-08 12:16:21 - Accuracy Manhattan Distance:	61.76
2022-08-08 12:16:21 - Accuracy Euclidean Distance:	63.29



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:16:45 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 10:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:16:48 - Accuracy Cosine Distance:   	63.64
2022-08-08 12:16:48 - Accuracy Manhattan Distance:	62.60
2022-08-08 12:16:48 - Accuracy Euclidean Distance:	63.64



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:16:57 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/4000
2022-08-08 12:17:12 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 11:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:17:16 - Accuracy Cosine Distance:   	63.19
2022-08-08 12:17:16 - Accuracy Manhattan Distance:	63.44
2022-08-08 12:17:16 - Accuracy Euclidean Distance:	63.19



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:17:34 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/4500
2022-08-08 12:17:39 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 12:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:17:43 - Accuracy Cosine Distance:   	62.94
2022-08-08 12:17:43 - Accuracy Manhattan Distance:	62.75
2022-08-08 12:17:43 - Accuracy Euclidean Distance:	62.94



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:18:06 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 13:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:18:10 - Accuracy Cosine Distance:   	63.88
2022-08-08 12:18:10 - Accuracy Manhattan Distance:	63.14
2022-08-08 12:18:10 - Accuracy Euclidean Distance:	63.88



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:18:15 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/5000
2022-08-08 12:18:34 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 14:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:18:37 - Accuracy Cosine Distance:   	63.54
2022-08-08 12:18:37 - Accuracy Manhattan Distance:	62.60
2022-08-08 12:18:37 - Accuracy Euclidean Distance:	63.54



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:18:51 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/5500
2022-08-08 12:19:01 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 15:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:19:05 - Accuracy Cosine Distance:   	63.09
2022-08-08 12:19:05 - Accuracy Manhattan Distance:	62.99
2022-08-08 12:19:05 - Accuracy Euclidean Distance:	63.09



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:19:28 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 16:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:19:33 - Accuracy Cosine Distance:   	63.04
2022-08-08 12:19:33 - Accuracy Manhattan Distance:	62.80
2022-08-08 12:19:33 - Accuracy Euclidean Distance:	63.04



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:19:34 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/6000
2022-08-08 12:19:57 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 17:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:20:00 - Accuracy Cosine Distance:   	62.94
2022-08-08 12:20:00 - Accuracy Manhattan Distance:	62.45
2022-08-08 12:20:00 - Accuracy Euclidean Distance:	62.94



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:20:11 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/6500
2022-08-08 12:20:24 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 18:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:20:27 - Accuracy Cosine Distance:   	63.29
2022-08-08 12:20:27 - Accuracy Manhattan Distance:	62.85
2022-08-08 12:20:27 - Accuracy Euclidean Distance:	63.29



Iteration:   0%|          | 0/352 [00:00<?, ?it/s]

2022-08-08 12:20:48 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/7000
2022-08-08 12:20:51 - TripletEvaluator: Evaluating the model on csabstruct-dev dataset after epoch 19:


Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

Batches:   0%|          | 0/127 [00:00<?, ?it/s]

2022-08-08 12:20:54 - Accuracy Cosine Distance:   	62.99
2022-08-08 12:20:54 - Accuracy Manhattan Distance:	62.30
2022-08-08 12:20:54 - Accuracy Euclidean Distance:	62.99

2022-08-08 12:20:54 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04/checkpoint/7040
CPU times: user 8min 10s, sys: 46.1 s, total: 8min 56s
Wall time: 9min 6s


In [23]:
logging.info("Evaluating model on test set")
test_evaluator = TripletEvaluator.from_input_examples(
    test_set, write_csv=True, show_progress_bar=True, name='csabstruct-test'
)
model.evaluate(test_evaluator)

2022-08-08 12:20:55 - Evaluating model on test set
2022-08-08 12:20:55 - TripletEvaluator: Evaluating the model on csabstruct-test dataset:


Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Batches:   0%|          | 0/85 [00:00<?, ?it/s]

2022-08-08 12:20:57 - Accuracy Cosine Distance:   	71.31
2022-08-08 12:20:57 - Accuracy Manhattan Distance:	69.90
2022-08-08 12:20:57 - Accuracy Euclidean Distance:	71.31



0.7131208302446257

In [24]:
output_path + "_final"

'./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04_final'

In [25]:
model.save(output_path + "_final")

2022-08-08 12:20:57 - Save model to ./gdrive/Shareddrives/MODELS/all-MiniLM-L6-v2_CSAbstruct_TripletAll-2022-08-08_12-11-04_final
