In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#install required libraries
!pip install pytorch-lightning
!pip install transformers

Collecting pytorch-lightning
  Downloading pytorch_lightning-1.5.2-py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 11.0 MB/s 
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2021.11.0-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 49.9 MB/s 
[?25hCollecting pyDeprecate==0.3.1
  Downloading pyDeprecate-0.3.1-py3-none-any.whl (10 kB)
Collecting PyYAML>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 45.3 MB/s 
Collecting future>=0.17.1
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 43.3 MB/s 
[?25hCollecting torchmetrics>=0.4.1
  Downloading torchmetrics-0.6.0-py3-none-any.whl (329 kB)
[K     |████████████████████████████████| 329 kB 50.0 MB/s 
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.

In [None]:
#import dependencies
import os
import copy
import time
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix

import torch
import transformers
import tokenizers

from imblearn.over_sampling import RandomOverSampler


import torch
import torchmetrics
from torch.utils.data import DataLoader

import pytorch_lightning as pl
from pytorch_lightning import seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
def get_msr_data(paraphrase_data_path, data_part):
  # This function reads the MRPC dataset
  with open(os.path.join(paraphrase_data_path, data_part), 'r') as f:
    data = f.read()

  sentence_1_data = [el.split('\t')[3:][0] for el in data.split('\n')[1:-1]]
  sentence_2_data = [el.split('\t')[3:][1] for el in data.split('\n')[1:-1]]
  targets = [int(el.split('\t')[0][0]) for el in data.split('\n')[1:-1]]
  return sentence_1_data, sentence_2_data, targets

In [None]:
def my_collate(batch):
  batch = filter(lambda x: x is not None, batch)
  return torch.utils.data.dataloader.default_collate(list(batch))

In [None]:
class BERTMSRDataset:
  # This class returns a processed data sample by index
  def __init__(self, sent_1, sent_2, targets):
    self.sent_1 = sent_1
    self.sent_2 = sent_2
    self.targets = targets
    self.sent_len = len(sent_1)

  def __len__(self):
    return self.sent_len

  def __getitem__(self, idx):

    target = torch.FloatTensor([self.targets[idx]])
    s1 = " ".join(self.sent_1[idx].split()).lower() 
    s2 = " ".join(self.sent_2[idx].split()).lower() # remove redundant spaces in a sentence

    inputs = TOKENIZER.encode_plus(s1, s2, add_special_tokens=True, max_length=MAX_LEN, padding='max_length') # encode sentences

    ids = torch.LongTensor(inputs['input_ids']) # a padded vector of encoded words
    token_type_ids = torch.LongTensor(inputs['token_type_ids']) # a mask that separates one sentence from the other
    mask = torch.LongTensor(inputs['attention_mask']) # a mask that highlights what part of the token ids the model needs to attend

    # data format: SENTENCE1 [SEP] SENTENCE2

    return {
        "ids": ids,
        "token_type_ids": token_type_ids,
        "mask": mask,
        "target": target,
        "sent": self.sent_1[idx] + '[SEP]' + self.sent_2[idx]
    }

In [None]:
class BERTModel(pl.LightningModule):
  def __init__(self, conf, learning_rate=1e-4):
    super().__init__()
    self.bert = transformers.BertModel.from_pretrained('bert-base-uncased', config=conf) # load pretrained bert model
    self.drop = torch.nn.Dropout(0.5) # add regularization
    self.out = torch.nn.Linear(self.bert.config.hidden_size, 1) # add a classification layer

    self.loss = torch.nn.BCELoss()

    self.accuracy_t = torchmetrics.Accuracy()
    self.accuracy_v = torchmetrics.Accuracy()
    self.learning_rate = learning_rate

  def forward(self, ids, token_type_ids, mask, labels=None):
    out = self.bert(input_ids=ids, token_type_ids=token_type_ids, attention_mask=mask)['pooler_output']
    out = self.drop(out)
    out = self.out(out)
    out = torch.sigmoid(out) # pass the output of the model through the sigmoid function
    
    return out

  def configure_optimizers(self):
    optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, eta_min=1e-5)
    return [optimizer], [lr_scheduler]

  def training_step(self, train_batch, batch_idx):
    ids, token_type_ids, mask, target, sent = train_batch.values()
    y = target.to(torch.float)
    y_hat = self.forward(ids, token_type_ids, mask)
    loss = self.loss(y_hat, y)
    preds = (y_hat > 0.5).to(torch.int)
    self.log('train_loss', loss)
    self.log('train_acc_step', self.accuracy_t(preds, y.to(torch.int)))
    print(batch_idx, self.accuracy_t.compute())
    return loss

  def validation_step(self, val_batch, batch_idx):
    ids, token_type_ids, mask, target, sent = val_batch.values()
    y = target.to(torch.float)
    y_hat = self.forward(ids, token_type_ids, mask)
    loss = self.loss(y_hat, y)
    preds = (y_hat > 0.5).to(torch.int)
    self.log('valid_loss', loss)
    self.log('valid_acc_step', self.accuracy_v(preds, y.to(torch.int)))
    return loss
  
  def validation_epoch_end(self, validation_step_outputs):
    self.log('valid_acc_epoch', self.accuracy_v.compute())
    print('valid_acc_epoch', self.accuracy_v.compute())

  def training_epoch_end(self, outs):
    self.log('train_acc_epoch', self.accuracy_t.compute())
    print('train_acc_epoch', self.accuracy_t.compute())

In [None]:
def evaluate_model(bert):
  # model evaluation stage
  bert.eval()
  c = 0
  targ, pred = [], []
  for inputs in dl_test:
    targ += list(inputs['target'].detach().cpu().numpy().squeeze(1))
    with torch.set_grad_enabled(False):
      outputs, loss = bert(inputs['ids'].to(device), inputs['token_type_ids'].to(device), inputs['mask'].to(device), labels=inputs['target'].to(device))
    pred += list((outputs.detach().cpu().squeeze(1).numpy() > 0.5).astype(int))
    c+=1
    print(f'step: {c}, loss: {loss}')

  targ = np.array(list(map(int, targ)))
  pred = np.array(pred)
  acc = np.sum(targ == pred)/len(targ)
  conf_matrix = confusion_matrix(targ, pred)

  print('Accuracy:', acc)
  print('Confusion Matrix:')
  print(conf_matrix)

  return acc, conf_matrix

In [None]:
def train_model(model, optimizer, scheduler):
  # trains the model for one epoch
  model.train()

  running_loss = 0.0
  running_corrects = 0
  total_steps = dataset_sizes['train'] // BS + 1
  for step, inputs in enumerate(dataloaders['train']):
    ids = inputs['ids'].to(device)
    mask = inputs['mask'].to(device)
    token_type_ids = inputs['token_type_ids'].to(device)
    labels = inputs['target'].to(device)

    optimizer.zero_grad()

    with torch.set_grad_enabled(True):
      outputs, loss = model(ids, token_type_ids, mask, labels)
      preds = outputs.detach().cpu().numpy() > 0.5
      loss.backward()
      optimizer.step()
      scheduler.step()
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
      print('step: {}/{} Loss: {:.4f}'.format(step+1, total_steps, loss))

    # statistics
    running_loss += loss.item() * len(inputs)
    running_corrects += np.sum(preds == labels.detach().cpu().numpy())           
    scheduler.step()
    epoch_loss = running_loss / dataset_sizes['train']
    epoch_acc = running_corrects / dataset_sizes['train']

    print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
  return model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
paraphrase_data_path = '/content/drive/MyDrive/msr_paraphrase'
pawsx_data_paths_train = ['/content/final/train.tsv']#, '/content/final/dev.tsv']
pawsx_data_paths_test = '/content/final/test.tsv'

EPOCHS = 4
BS = 64
WARMUP_STEPS = 0
MAX_LEN = 128 # maximal length of a sentence
LR = 2e-5
TOKENIZER = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
model_config = transformers.BertConfig.from_pretrained('bert-base-uncased')
model_config.output_hidden_states = True
bert = BERTModel(model_config).to(device)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
paraphrase_data_path = '/content/drive/MyDrive/msr_paraphrase'
train_1, train_2, train_targets = get_msr_data(paraphrase_data_path, 'msr_paraphrase_train.txt')
test_1, test_2, test_targets = get_msr_data(paraphrase_data_path, 'msr_paraphrase_test.txt')
train_1_2 = np.array([[el1, el2] for el1, el2 in zip(train_1, train_2)])
test_1_2 = np.array([[el1, el2] for el1, el2 in zip(test_1, test_2)])

ros = RandomOverSampler(random_state=0, sampling_strategy='minority')
train_1_2, train_targets = ros.fit_resample(train_1_2, train_targets)
ros = RandomOverSampler(random_state=0, sampling_strategy='minority')
test_1_2, test_targets = ros.fit_resample(test_1_2, test_targets)

test_1, test_2 = list(test_1_2[:,0]), list(test_1_2[:,1])
train_1, train_2 = list(train_1_2[:,0]), list(train_1_2[:,1]) # get sentences and targets
#df_train = pd.concat([pd.read_csv(path, sep='\t') for path in pawsx_data_paths_train])
#df_test = pd.read_csv(pawsx_data_paths_test, sep='\t')

In [None]:
ds_train = BERTMSRDataset(train_1, train_2, train_targets)
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=BS, shuffle=True, num_workers=2, worker_init_fn=np.random.seed(0), collate_fn=my_collate) # create torch dataloader

ds_test = BERTMSRDataset(test_1, test_2, test_targets)
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=BS, shuffle=True, num_workers=2, worker_init_fn=np.random.seed(0), collate_fn=my_collate)

dataloaders = {'train': dl_train, 'val': dl_test}
dataset_sizes = {'train': len(ds_train), 'val': len(ds_test)}

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
early_stop_callback = EarlyStopping(monitor="valid_acc_epoch", min_delta=0.00, patience=4, verbose=False, mode="max")
checkpoint_callback = ModelCheckpoint(
    monitor="valid_acc_epoch",
    dirpath="/content/drive/MyDrive/text_similarity",
    filename="bert-{epoch:02d}-{valid_acc_epoch:.2f}",
    save_top_k=1,
    mode="max",
)

# training
trainer = pl.Trainer(gpus=1, callbacks=[early_stop_callback, checkpoint_callback], max_epochs = 15, check_val_every_n_epoch=1)
trainer.fit(bert, dl_train, dl_test)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type      | Params
-----------------------------------------
0 | bert       | BertModel | 109 M 
1 | drop       | Dropout   | 0     
2 | out        | Linear    | 769   
3 | loss       | BCELoss   | 0     
4 | accuracy_t | Accuracy  | 0     
5 | accuracy_v | Accuracy  | 0     
-----------------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
437.932   Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

  f"Your `{mode.dataloader_prefix}_dataloader` has `shuffle=True`,"
  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


valid_acc_epoch tensor(0.4844, device='cuda:0')


Training: 0it [00:00, ?it/s]

0 tensor(0.6094, device='cuda:0')
1 tensor(0.5781, device='cuda:0')
2 tensor(0.5469, device='cuda:0')
3 tensor(0.5352, device='cuda:0')
4 tensor(0.5188, device='cuda:0')
5 tensor(0.5208, device='cuda:0')
6 tensor(0.5379, device='cuda:0')
7 tensor(0.5430, device='cuda:0')
8 tensor(0.5434, device='cuda:0')
9 tensor(0.5484, device='cuda:0')
10 tensor(0.5469, device='cuda:0')
11 tensor(0.5391, device='cuda:0')
12 tensor(0.5385, device='cuda:0')
13 tensor(0.5346, device='cuda:0')
14 tensor(0.5375, device='cuda:0')
15 tensor(0.5391, device='cuda:0')
16 tensor(0.5423, device='cuda:0')
17 tensor(0.5443, device='cuda:0')
18 tensor(0.5461, device='cuda:0')
19 tensor(0.5406, device='cuda:0')
20 tensor(0.5379, device='cuda:0')
21 tensor(0.5419, device='cuda:0')
22 tensor(0.5442, device='cuda:0')
23 tensor(0.5449, device='cuda:0')
24 tensor(0.5469, device='cuda:0')
25 tensor(0.5511, device='cuda:0')
26 tensor(0.5573, device='cuda:0')
27 tensor(0.5580, device='cuda:0')
28 tensor(0.5582, device='cuda

  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


86 tensor(0.6664, device='cuda:0')


Validating: 0it [00:00, ?it/s]

  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


valid_acc_epoch tensor(0.7746, device='cuda:0')
train_acc_epoch tensor(0.6664, device='cuda:0')
0 tensor(0.6686, device='cuda:0')
1 tensor(0.6699, device='cuda:0')
2 tensor(0.6711, device='cuda:0')
3 tensor(0.6730, device='cuda:0')
4 tensor(0.6752, device='cuda:0')
5 tensor(0.6771, device='cuda:0')
6 tensor(0.6787, device='cuda:0')
7 tensor(0.6808, device='cuda:0')
8 tensor(0.6823, device='cuda:0')
9 tensor(0.6847, device='cuda:0')
10 tensor(0.6873, device='cuda:0')
11 tensor(0.6890, device='cuda:0')
12 tensor(0.6914, device='cuda:0')
13 tensor(0.6928, device='cuda:0')
14 tensor(0.6944, device='cuda:0')
15 tensor(0.6966, device='cuda:0')
16 tensor(0.6990, device='cuda:0')
17 tensor(0.7010, device='cuda:0')
18 tensor(0.7032, device='cuda:0')
19 tensor(0.7053, device='cuda:0')
20 tensor(0.7070, device='cuda:0')
21 tensor(0.7089, device='cuda:0')
22 tensor(0.7099, device='cuda:0')
23 tensor(0.7112, device='cuda:0')
24 tensor(0.7126, device='cuda:0')
25 tensor(0.7141, device='cuda:0')
26 t

Validating: 0it [00:00, ?it/s]

valid_acc_epoch tensor(0.6917, device='cuda:0')
train_acc_epoch tensor(0.7818, device='cuda:0')
0 tensor(0.7820, device='cuda:0')
1 tensor(0.7821, device='cuda:0')
2 tensor(0.7828, device='cuda:0')
3 tensor(0.7832, device='cuda:0')
4 tensor(0.7838, device='cuda:0')
5 tensor(0.7845, device='cuda:0')
6 tensor(0.7850, device='cuda:0')
7 tensor(0.7860, device='cuda:0')
8 tensor(0.7867, device='cuda:0')
9 tensor(0.7870, device='cuda:0')
10 tensor(0.7879, device='cuda:0')
11 tensor(0.7889, device='cuda:0')
12 tensor(0.7895, device='cuda:0')
13 tensor(0.7901, device='cuda:0')
14 tensor(0.7910, device='cuda:0')
15 tensor(0.7919, device='cuda:0')
16 tensor(0.7927, device='cuda:0')
17 tensor(0.7937, device='cuda:0')
18 tensor(0.7947, device='cuda:0')
19 tensor(0.7957, device='cuda:0')
20 tensor(0.7964, device='cuda:0')
21 tensor(0.7973, device='cuda:0')
22 tensor(0.7980, device='cuda:0')
23 tensor(0.7989, device='cuda:0')
24 tensor(0.7996, device='cuda:0')
25 tensor(0.8006, device='cuda:0')
26 t

Validating: 0it [00:00, ?it/s]

valid_acc_epoch tensor(0.6800, device='cuda:0')
train_acc_epoch tensor(0.8384, device='cuda:0')
0 tensor(0.8388, device='cuda:0')
1 tensor(0.8389, device='cuda:0')
2 tensor(0.8393, device='cuda:0')
3 tensor(0.8398, device='cuda:0')
4 tensor(0.8404, device='cuda:0')
5 tensor(0.8409, device='cuda:0')
6 tensor(0.8415, device='cuda:0')
7 tensor(0.8421, device='cuda:0')
8 tensor(0.8426, device='cuda:0')
9 tensor(0.8432, device='cuda:0')
10 tensor(0.8436, device='cuda:0')
11 tensor(0.8442, device='cuda:0')
12 tensor(0.8447, device='cuda:0')
13 tensor(0.8452, device='cuda:0')
14 tensor(0.8457, device='cuda:0')
15 tensor(0.8463, device='cuda:0')
16 tensor(0.8468, device='cuda:0')
17 tensor(0.8473, device='cuda:0')
18 tensor(0.8479, device='cuda:0')
19 tensor(0.8484, device='cuda:0')
20 tensor(0.8489, device='cuda:0')
21 tensor(0.8494, device='cuda:0')
22 tensor(0.8500, device='cuda:0')
23 tensor(0.8504, device='cuda:0')
24 tensor(0.8509, device='cuda:0')
25 tensor(0.8514, device='cuda:0')
26 t

Validating: 0it [00:00, ?it/s]

valid_acc_epoch tensor(0.7074, device='cuda:0')
train_acc_epoch tensor(0.8740, device='cuda:0')
0 tensor(0.8743, device='cuda:0')
1 tensor(0.8746, device='cuda:0')
2 tensor(0.8750, device='cuda:0')
3 tensor(0.8754, device='cuda:0')
4 tensor(0.8757, device='cuda:0')
5 tensor(0.8761, device='cuda:0')
6 tensor(0.8764, device='cuda:0')
7 tensor(0.8767, device='cuda:0')
8 tensor(0.8771, device='cuda:0')
9 tensor(0.8774, device='cuda:0')
10 tensor(0.8777, device='cuda:0')
11 tensor(0.8781, device='cuda:0')
12 tensor(0.8784, device='cuda:0')
13 tensor(0.8787, device='cuda:0')
14 tensor(0.8790, device='cuda:0')
15 tensor(0.8793, device='cuda:0')
16 tensor(0.8796, device='cuda:0')
17 tensor(0.8800, device='cuda:0')
18 tensor(0.8803, device='cuda:0')
19 tensor(0.8806, device='cuda:0')
20 tensor(0.8809, device='cuda:0')
21 tensor(0.8813, device='cuda:0')
22 tensor(0.8816, device='cuda:0')
23 tensor(0.8818, device='cuda:0')
24 tensor(0.8820, device='cuda:0')
25 tensor(0.8823, device='cuda:0')
26 t

Validating: 0it [00:00, ?it/s]

valid_acc_epoch tensor(0.7170, device='cuda:0')
train_acc_epoch tensor(0.8979, device='cuda:0')


In [None]:
# save/load model to/from file

#with open('/content/drive/MyDrive/text_similarity/bert.mdl', 'wb') as f:
#  pickle.dump(bert.to('cpu'), f)
#with open('/content/drive/MyDrive/text_similarity/bert.mdl', 'rb') as f:
#  bert = pickle.load(f)
#bert = bert.to(device)

In [None]:
i = 4
ids = torch.LongTensor(ds_test[i]['ids']).unsqueeze(0).to(device)
token_type_ids = torch.LongTensor(ds_test[i]['token_type_ids']).unsqueeze(0).to(device)
mask = torch.LongTensor(ds_test[i]['mask']).unsqueeze(0).to(device)
print(ds_test[i]['sent'])
with torch.no_grad():
  outputs = bert(ids, token_type_ids, mask)
print('Sentences are similar:', outputs.cpu().detach().squeeze(0).numpy()[0] > 0.5)
print('Confidence:', outputs.cpu().detach().squeeze(0).numpy()[0])
print('Target:', ds_test[i]['target'])

The company didn't detail the costs of the replacement and repairs.[SEP]But company officials expect the costs of the replacement work to run into the millions of dollars.


RuntimeError: ignored

In [None]:
# use your own sentences
s1 = '''The 2021 Japan Series is the ongoing championship series of Nippon Professional Baseball's postseason. The 72nd edition of the Japan Series, it is a best-of-seven playoff between the Orix Buffaloes and Tokyo Yakult Swallows, the winners of the Pacific and Central League's Climax Series, respectively. Both teams were the winners of their respective league's regular season championship.'''
s2 = '''A mid-season hiatus for the 2020 Summer Olympics and other game postponements required the Japan Series to be pushed back into late November, requiring some games to be played at stadiums other than the team's usual home fields due to scheduling conflicts. The series began on November 20, with a potential Game 7 scheduled for November 28. Yakult leads the series, 2–1.'''

inputs = TOKENIZER.encode_plus(s1, s2, add_special_tokens=True, max_length=None, padding='max_length')

ids = torch.LongTensor(inputs['input_ids']).unsqueeze(0).to(device)
token_type_ids = torch.LongTensor(inputs['token_type_ids']).unsqueeze(0).to(device)
mask = torch.LongTensor(inputs['attention_mask']).unsqueeze(0).to(device)
with torch.no_grad():
  outputs = bert(ids, token_type_ids, mask)
print('Sentences are similar:', outputs.cpu().detach().squeeze(0).numpy()[0] > 0.5)
print('Confidence:', outputs.cpu().detach().squeeze(0).numpy()[0])

In [None]:
s1 = 'Any trip to Italy should include a visit to Tuscany to sample the region\'s exquisite wines'
s2 = 'My name is Nikita.'

inputs = TOKENIZER.encode_plus(s1, s2, add_special_tokens=True, max_length=MAX_LEN, padding='max_length')

ids = torch.LongTensor(inputs['input_ids']).unsqueeze(0).to(device)
token_type_ids = torch.LongTensor(inputs['token_type_ids']).unsqueeze(0).to(device)
mask = torch.LongTensor(inputs['attention_mask']).unsqueeze(0).to(device)
outputs = bert(ids, token_type_ids, mask)
print('Sentences are similar:', outputs.cpu().detach().squeeze(0).numpy()[0] > 0.5)
print('Confidence:', outputs.cpu().detach().squeeze(0).numpy()[0])

In [None]:
'''for inputs in dl_train:
  break
ids = inputs['ids']
mask = inputs['mask']
token_type_ids = inputs['token_type_ids']
labels = inputs['target']

#bert = transformers.BertModel.from_pretrained('bert-base-uncased', config=model_config)
out, loss = bert(ids, token_type_ids, mask, labels)'''

In [None]:
'''class BERTPAWSDataset:
  def __init__(self, df_train):
    self.sent_1 = list(df_train['sentence1'].values)
    self.sent_2 = list(df_train['sentence2'].values)
    self.labels = list(df_train['label'].values)
    self.sent_len = len(self.sent_1)

  def __len__(self):
    return self.sent_len

  def __getitem__(self, idx):

    rand_num = np.random.uniform()

    s1 = " ".join(self.sent_1[idx].split()).lower()
    s2 = " ".join(self.sent_2[idx].split()).lower()
    target = torch.FloatTensor([self.labels[idx]])

    inputs = TOKENIZER.encode_plus(s1, s2, add_special_tokens=True, max_length=MAX_LEN, padding='max_length')

    ids = torch.LongTensor(inputs['input_ids'])
    token_type_ids = torch.LongTensor(inputs['token_type_ids'])
    mask = torch.LongTensor(inputs['attention_mask'])
    if len(ids) <= MAX_LEN:
      return {
          "ids": ids,
          "token_type_ids": token_type_ids,
          "mask": mask,
          "target": target,
          "sent": self.sent_1[idx] + '[SEP]' + self.sent_2[idx]
      }'''