In [36]:
!pip install transformers



In [37]:
!pip install datasets



In [38]:
!pip install colorama



In [39]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import sys
import torch
from datasets import load_dataset
from datasets import Dataset
import numpy as np
from torch.utils.data import RandomSampler, DataLoader, SequentialSampler, TensorDataset
from tqdm import tqdm
from transformers import BertTokenizer, BertForSequenceClassification
from colorama import Fore

import re

#Removed special symbols that were not captured by punctuation
def remove_special_symbols(text):
    text = re.sub('->', ' ', str(text))
    text = re.sub('“', ' ', str(text))
    text = re.sub('”', ' ', str(text))
    text = re.sub('\.',' ', str(text))
    return re.sub('’',' ', str(text))
  
def denoise_text(text):
    text = remove_special_symbols(text)
    return str(text)

#Convert labels to numbers
def convert_labels(text):
  if(text == 'neutral'):
    text = 0
  elif (text=='entailment'):
    text = 1
  elif (text=='contradiction'):
    text = 2
  return text

In [40]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [41]:
gpu = torch.device('cuda')
# ============================================= DOWNLOADING DATA =======================================================
epochs = 3
batch_size = 8
max_seq_length = 128

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

import pandas as pd
pd.options.mode.chained_assignment = None

df_train = pd.read_csv('gdrive/MyDrive/Colab Notebooks/2sat2var/small_10K/train_data_10K.csv', sep=None, engine="python")
df_eval = pd.read_csv('gdrive/MyDrive/Colab Notebooks/2sat2var/small_10K/eval_data_10K.csv', sep=None, engine="python")

print(len(df_train))
print(len(df_eval))

train_data = Dataset.from_pandas(df_train)
eval_data = Dataset.from_pandas(df_eval)

10000
1000


In [None]:
train_data[:10]

{'idx': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'is_inclusive': [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
 'label': [1, 0, 1, 1, 0, 1, 1, 0, 1, 0],
 'premise': ['Rogers is snobby or Enos is not logical. Rogers is snobby.',
  'Rogers is snobby or Enos is not logical. Rogers is snobby.',
  'Rogers is snobby or Enos is not logical. Rogers is snobby.',
  'Rogers is snobby or Enos is not logical. Rogers is snobby.',
  'Quinten is not childlike or Mary is not cranky. Mary is cranky.',
  'Quinten is not childlike or Mary is not cranky. Mary is cranky.',
  'Quinten is not childlike or Mary is not cranky. Mary is cranky.',
  'Quinten is not childlike or Mary is not cranky. Mary is cranky.',
  'Either Claudette is not leery or Harvie is not musical. Claudette is leery.',
  'Either Claudette is not leery or Harvie is not musical. Claudette is leery.'],
 'question': ['Rogers is snobby',
  'Rogers is not snobby',
  'Enos is logical',
  'Enos is not logical',
  'Quinten is childlike',
  'Quinten is not childlike',


In [42]:
# ============================================= PREPARING DATASET ======================================================


def truncate_seq_pair(tokens_a, tokens_b, max_length):
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b): #trim the longest token
            tokens_a.pop()
        else:
            tokens_b.pop()


def convert_examples_to_features(examples, desc):
    p_bar = tqdm(total=len(examples), desc=desc,
                 position=0, leave=True,
                 file=sys.stdout, bar_format="{l_bar}%s{bar}%s{r_bar}" % (Fore.BLUE, Fore.RESET))
    labels = []
    input_word_ids = []
    input_type_ids = []
    input_masks = []
    for (index, example) in enumerate(examples):
        if "label" in example:
            labels.append(example["label"])
        example["premise"]=denoise_text(example["premise"])
        example["question"]=denoise_text(example["question"])
        tokens_a = tokenizer.tokenize(example["premise"])
        tokens_b = tokenizer.tokenize(example["question"])
        truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
        tokens = []
        segment_ids = []
        #Inserting tokens for sentence1
        tokens.append("[CLS]")
        segment_ids.append(0)
        for token in tokens_a:
            tokens.append(token)
            segment_ids.append(0)
        tokens.append("[SEP]")
        segment_ids.append(0)
        #Inserting tokens for sentence2
        for token in tokens_b:
            tokens.append(token)
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)
        input_ids = tokenizer.convert_tokens_to_ids(tokens)
        input_mask = [1] * len(input_ids)
        #Padding the input_mask
        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)
        input_word_ids.append(input_ids)
        input_type_ids.append(segment_ids)
        input_masks.append(input_mask)
        p_bar.update(1)
    p_bar.close()
    return [torch.tensor(input_word_ids, dtype=torch.int64),
            torch.tensor(input_masks, dtype=torch.float),
            torch.tensor(input_type_ids, dtype=torch.int64),
            torch.tensor(labels, dtype=torch.int64)]


In [43]:
train_data = TensorDataset(*convert_examples_to_features(train_data, "Creating training samples"))
train_sampler = RandomSampler(train_data)
train_data_loader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

eval_data = TensorDataset(*convert_examples_to_features(eval_data, "Creating evaluation samples"))
eval_sampler = SequentialSampler(eval_data)
validation_data_loader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)

Creating training samples: 100%|[34m██████████[39m| 10000/10000 [00:08<00:00, 1218.49it/s]
Creating evaluation samples: 100%|[34m██████████[39m| 1000/1000 [00:00<00:00, 1047.27it/s]


In [None]:
train_data.tensors

(tensor([[  101,  7369,  2003,  ...,     0,     0,     0],
         [  101,  7369,  2003,  ...,     0,     0,     0],
         [  101,  7369,  2003,  ...,     0,     0,     0],
         ...,
         [  101,  2593, 14304,  ...,     0,     0,     0],
         [  101,  2593, 14304,  ...,     0,     0,     0],
         [  101,  2593, 14304,  ...,     0,     0,     0]]),
 tensor([[1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         ...,
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.]]),
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]),
 tensor([1, 0, 1,  ..., 0, 1, 1]))

In [44]:
# ================================================ TRAINING MODEL ======================================================
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device=gpu)

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

optimizer = torch.optim.Adam(lr=1e-5, betas=(0.9, 0.98), eps=1e-9, params=optimizer_grouped_parameters)
# model.load_state_dict(torch.load("./weights_4.pth"))


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [46]:
epochs = 10
for epoch in range(1, epochs + 1):
    # ============================================ TRAINING ============================================================
    print("Training epoch ", str(epoch))
    training_pbar = tqdm(total=len(train_data),
                         position=0, leave=True,
                         file=sys.stdout, bar_format="{l_bar}%s{bar}%s{r_bar}" % (Fore.GREEN, Fore.RESET))
    model.train()
    tr_loss = 0
    nb_tr_steps = 0
    for step, batch in enumerate(train_data_loader):
        batch = tuple(t.to(gpu) for t in batch)
        input_word_ids, input_mask, input_type_ids, labels = batch
        optimizer.zero_grad() #clear gradient first
        loss, _ = model(input_ids=input_word_ids,
                        attention_mask=input_mask,
                        token_type_ids=input_type_ids,
                        labels=labels, return_dict=False)
        loss.backward()
        optimizer.step()
        tr_loss += loss.item()
        nb_tr_steps += 1
        training_pbar.update(input_word_ids.size(0))
    training_pbar.close()
    print(f"\nTraining loss={tr_loss / nb_tr_steps:.4f}")
    torch.save(model.state_dict(), "./small_weights_" + str(epoch) + ".pth")
    
    # ============================================ VALIDATION ==========================================================
    validation_pbar = tqdm(total=len(eval_data),
                           position=0, leave=True,
                           file=sys.stdout, bar_format="{l_bar}%s{bar}%s{r_bar}" % (Fore.BLUE, Fore.RESET))
    model.eval()
    eval_accuracy = 0
    nb_eval_steps = 0
    for batch in validation_data_loader:
        batch = tuple(t.to(gpu) for t in batch)
        input_word_ids, input_mask, input_type_ids, labels = batch
        with torch.no_grad():
            logits = model(input_ids=input_word_ids,
                           attention_mask=input_mask,
                           token_type_ids=input_type_ids, return_dict=False)

        logits = logits[0].detach().cpu().numpy()
        label_ids = labels.cpu().numpy()
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        eval_accuracy += np.sum(pred_flat == labels_flat) / len(labels_flat)
        nb_eval_steps += 1
        validation_pbar.update(input_word_ids.size(0))
    validation_pbar.close()
    print("Validation Accuracy: {}".format(eval_accuracy / nb_eval_steps))


Training epoch  1
  6%|[32m▌         [39m| 608/10000 [00:26<06:53, 22.72it/s]
100%|[32m██████████[39m| 10000/10000 [02:55<00:00, 56.97it/s]

Training loss=0.4680
100%|[34m██████████[39m| 1000/1000 [00:04<00:00, 226.83it/s]
Validation Accuracy: 0.919
Training epoch  2
100%|[32m██████████[39m| 10000/10000 [02:54<00:00, 57.23it/s]

Training loss=0.0855
100%|[34m██████████[39m| 1000/1000 [00:04<00:00, 225.61it/s]
Validation Accuracy: 0.996
Training epoch  3
100%|[32m██████████[39m| 10000/10000 [02:55<00:00, 57.04it/s]

Training loss=0.0232
100%|[34m██████████[39m| 1000/1000 [00:04<00:00, 226.57it/s]
Validation Accuracy: 0.996
Training epoch  4
100%|[32m██████████[39m| 10000/10000 [02:54<00:00, 57.19it/s]

Training loss=0.0136
100%|[34m██████████[39m| 1000/1000 [00:04<00:00, 225.79it/s]
Validation Accuracy: 0.996
Training epoch  5
100%|[32m██████████[39m| 10000/10000 [02:54<00:00, 57.30it/s]

Training loss=0.0108
100%|[34m██████████[39m| 1000/1000 [00:04<00:00, 228.10i

In [54]:
# ============================================ TESTING =================================================================
#model.load_state_dict(torch.load("gdrive/MyDrive/Colab Notebooks/2sat2var/weights_3.pth"))
model.eval()
test_data = [{
        'premise': 'Either Charlie is smart or Ron is lazy. Ron is not lazy',
        'question': 'Charlie is smart.'
    }, {
        'premise': 'Either I am going to the party or I am going to the gym. I am going to the gym.',
        'question': 'I am not going to the party.'
    }]
input_word_ids_test, input_masks_test, input_type_ids_test, _ = convert_examples_to_features(test_data, "Creating test samples")
result = model(input_ids=input_word_ids_test.to(gpu),
               attention_mask=input_masks_test.to(gpu),
               token_type_ids=input_type_ids_test.to(gpu), return_dict=False)
result = result[0].detach().cpu()
print(result.numpy())
result = torch.argmax(result, dim=1).numpy()
print(test_data[0])
print(test_data[1])
print(result)


Creating test samples: 100%|[34m██████████[39m| 2/2 [00:00<00:00, 1409.14it/s]
[[-6.113086   6.5828385]
 [ 3.4487603 -4.2878833]]
{'premise': 'Either Charlie is smart or Ron is lazy  Ron is not lazy', 'question': 'Charlie is smart '}
{'premise': 'Either I am going to the party or I am going to the gym  I am going to the gym ', 'question': 'I am not going to the party '}
[1 0]


In [55]:
# ============================================ TESTING =================================================================
#model.load_state_dict(torch.load("gdrive/MyDrive/Colab Notebooks/2sat2var/weights_3.pth"))
model.load_state_dict(torch.load("small_weights_9.pth"))

df_test = pd.read_csv('gdrive/MyDrive/Colab Notebooks/2sat2var/medium_100K/test_data_100K.csv', sep=None, engine="python")
test_data = Dataset.from_pandas(df_test)

test_data = TensorDataset(*convert_examples_to_features(test_data, "Creating test samples"))
test_sampler = SequentialSampler(test_data)
test_data_loader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

#print(len(test_data))

test_pbar = tqdm(total=len(test_data),
                           position=0, leave=True,
                           file=sys.stdout, bar_format="{l_bar}%s{bar}%s{r_bar}" % (Fore.RED, Fore.RESET))

model.eval()
test_accuracy = 0
nb_test_steps = 0
for batch in test_data_loader:
  batch = tuple(t.to(gpu) for t in batch)
  input_word_ids, input_mask, input_type_ids, labels = batch
  with torch.no_grad():
    result = model(input_ids=input_word_ids,
                           attention_mask=input_mask,
                           token_type_ids=input_type_ids, return_dict=False)

    result = result[0].detach().cpu().numpy()
    label_ids = labels.cpu().numpy()
    pred_flat = np.argmax(result, axis=1).flatten()
    labels_flat = label_ids.flatten()
    test_accuracy += np.sum(pred_flat == labels_flat) / len(labels_flat)
    nb_test_steps += 1
    test_pbar.update(input_word_ids.size(0))
test_pbar.close()
print("Test Accuracy: {}".format(test_accuracy / nb_test_steps))

Creating test samples: 100%|[34m██████████[39m| 10000/10000 [00:09<00:00, 1107.94it/s]
100%|[31m██████████[39m| 10000/10000 [00:44<00:00, 224.27it/s]
Test Accuracy: 0.9982
