# Adding Knowledge to Language Models
Pedro Augusto Vicentini Fracarolli (p191535@g.unicamp.br)

In this notebook we try to add new knowledge to a pretrained `t5-base` model.

In [None]:
!rm -rf lightning_logs/
!pip install --quiet transformers datasets pytorch-lightning sentencepiece

# installs adafactor
!git clone https://github.com/DeadAt0m/adafactor-pytorch.git adafactor_pytorch

# turns the repo into a python module
!echo '' >> adafactor_pytorch/__init__.py

[K     |████████████████████████████████| 3.1 MB 8.9 MB/s 
[K     |████████████████████████████████| 298 kB 69.6 MB/s 
[K     |████████████████████████████████| 524 kB 65.3 MB/s 
[K     |████████████████████████████████| 1.2 MB 43.9 MB/s 
[K     |████████████████████████████████| 895 kB 57.8 MB/s 
[K     |████████████████████████████████| 596 kB 62.7 MB/s 
[K     |████████████████████████████████| 3.3 MB 56.8 MB/s 
[K     |████████████████████████████████| 61 kB 584 kB/s 
[K     |████████████████████████████████| 243 kB 82.7 MB/s 
[K     |████████████████████████████████| 1.1 MB 55.1 MB/s 
[K     |████████████████████████████████| 132 kB 75.5 MB/s 
[K     |████████████████████████████████| 829 kB 62.1 MB/s 
[K     |████████████████████████████████| 332 kB 75.6 MB/s 
[K     |████████████████████████████████| 192 kB 87.3 MB/s 
[K     |████████████████████████████████| 271 kB 81.5 MB/s 
[K     |████████████████████████████████| 160 kB 87.3 MB/s 
[?25h  Building wheel for 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import nvidia_smi

nvidia_smi.nvmlInit()
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
print(f"Device name: {nvidia_smi.nvmlDeviceGetName(handle)}")

Device name: b'Tesla P100-PCIE-16GB'


In [None]:
from datasets import load_dataset

ds = load_dataset('nq_open')

Downloading:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.31k [00:00<?, ?B/s]

Downloading and preparing dataset nq_open/nq_open (download: 8.50 MiB, generated: 6.64 MiB, post-processed: Unknown size, total: 15.14 MiB) to /root/.cache/huggingface/datasets/nq_open/nq_open/2.0.0/75b7e191dc38a0f99f451a2cc0dc969fee2965238051d6f03989ff66ea1f39a5...


  0%|          | 0/2 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/126k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.61M [00:00<?, ?B/s]

  0%|          | 0/2 [00:00<?, ?it/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset nq_open downloaded and prepared to /root/.cache/huggingface/datasets/nq_open/nq_open/2.0.0/75b7e191dc38a0f99f451a2cc0dc969fee2965238051d6f03989ff66ea1f39a5. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
hparams = {
    'model_name': 't5-base',
    'tokenizer_name': 't5-base',
    'accumulate_grad_batches': 2,
    'lr': 1e-3,
    'validation_amt': 0.1,
    'max_length': 512,
    'batch_size': 64,
    'max_epochs': 250
}

In [None]:
import random

random.seed(1234)

def train_validation_split(dataset, validation_amt=0.1):
    dataset = list(dataset)
    pointcut = int(len(dataset) * (1 - validation_amt))
    random.shuffle(dataset)
    train = dataset[:pointcut]
    validation = dataset[pointcut:]
    return train, validation

def test_train_validation_split():
    ds_train = ds['train']
    train, validation = train_validation_split(ds_train)
    assert len(train) + len(validation) == len(ds_train)

test_train_validation_split()

In [None]:
from transformers import AutoTokenizer

class Tokenizer:
    def __init__(self, tokenizer_name: str, max_length=512):
        self._tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        self._max_length = max_length

    def __call__(self, text: str):
        return self._tokenizer(
            text,
            max_length=self._max_length,
            padding='longest',
            return_tensors='pt',
            truncation=True
        )

    def decode(self, *args, **kwargs):
        return self._tokenizer.decode(*args, **kwargs)

    @property
    def pad_token_id(self):
        return self._tokenizer.pad_token_id

def test_tokenizer():
    t = Tokenizer(hparams['tokenizer_name'])
    test_str = 'this is some text'
    out = t(test_str)
    
    assert t.decode(
        out.input_ids.squeeze(0), 
        skip_special_tokens=True
    ) == test_str

test_tokenizer()

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

In [None]:
from torch.utils.data import Dataset

class NQOpenDataset(Dataset):
    def __init__(self, dataset):
        super().__init__()
        self._dataset = dataset

    def __len__(self):
        return len(self._dataset)

    def __getitem__(self, idx):
        current = self._dataset[idx]
        x = current['question']
        y = current['answer']

        return x, y

def test_nq_open_dataset():
    dataset = NQOpenDataset(ds['train'])
    x, y = dataset[0]

    assert isinstance(y, list)
    print(dataset[0])

test_nq_open_dataset()

('where did they film hot tub time machine', ['Fernie Alpine Resort'])


In [None]:
class MaskedAnswerDataset(Dataset):
    def __init__(self, data, init_sentinel=0):
        self.x, self.y = self._mask(data, init_sentinel)

    def _mask(self, data, init_sentinel):
        return zip(*[
           (f"{d['question']} <extra_id_{init_sentinel}>", [f"<extra_id_{init_sentinel}> {d['answer'][0]} <extra_id_{init_sentinel + 1}>"])
           for d in data
        ])

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]

        return x, y

    def __len__(self):
        return len(self.y)

def test_masked_answer_dataset():
    m = MaskedAnswerDataset(ds['train'], init_sentinel=0)

    print(m[0])
    print(m[10])
    print(len(m))

test_masked_answer_dataset()

('where did they film hot tub time machine <extra_id_0>', ['<extra_id_0> Fernie Alpine Resort <extra_id_1>'])
('where does it talk about mary magdalene in the bible <extra_id_0>', ['<extra_id_0> New Testament <extra_id_1>'])
87925


In [None]:
train_data, validation_data = train_validation_split(
    ds['train'], 
    hparams['validation_amt']
)

test_data = ds['validation']

print(f'# of training examples: {len(train_data)}')
print(f'# of validation examples: {len(validation_data)}')
print(f'# of test examples: {len(test_data)}')

# of training examples: 79132
# of validation examples: 8793
# of test examples: 3610


In [None]:
class Collater:
    def __init__(self, tokenizer: Tokenizer):
        self._tokenizer = tokenizer

    def __call__(self, data):
        x, y = zip(*data)

        x_l = list(x)
        y_l = [_y[0] for _y in y]

        x_encoded = self._tokenizer(x_l)
        x_input_ids = x_encoded.input_ids
        x_attention_mask = x_encoded.attention_mask
        
        y_encoded = self._tokenizer(y_l)
        y_input_ids = y_encoded.input_ids

        y_input_ids[y_input_ids == self._tokenizer.pad_token_id] = -100

        return (
            x_input_ids, 
            x_attention_mask, 
            y_input_ids,
            x,
            y
        )
    

In [None]:
import torch
import pytorch_lightning as pl

from typing import Callable, Optional
from torch.utils.data import DataLoader

class FineTuningDataModule(pl.LightningDataModule):
    def __init__(
        self, 
        train_ds, 
        validation_ds, 
        test_ds,
        batch_size: int, 
        num_workers: int = 0,
        use_shuffle_for_train: bool = False,
        collate_fn: Optional[Callable] = None
    ):
        super().__init__()

        self.train_ds = train_ds
        self.validation_ds = validation_ds
        self.test_ds = test_ds
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.use_shuffle_for_train = use_shuffle_for_train
        self.collate_fn = collate_fn


    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            shuffle=self.use_shuffle_for_train,
            batch_size=self.batch_size,
            collate_fn=self.collate_fn,
            num_workers=self.num_workers
        )

    def val_dataloader(self):
        return DataLoader(
            self.validation_ds,
            shuffle=False,
            batch_size=self.batch_size,
            collate_fn=self.collate_fn,
            num_workers=self.num_workers
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_ds,
            shuffle=False,
            batch_size=self.batch_size,
            collate_fn=self.collate_fn,
            num_workers=self.num_workers
        )

torch.manual_seed(1234)

batch_size = hparams['batch_size']
tokenizer = Tokenizer(
    hparams['tokenizer_name'], 
    max_length=hparams['max_length']
)

collate_fn = Collater(tokenizer)

train_ds = NQOpenDataset(train_data)
validation_ds = NQOpenDataset(validation_data)
test_ds = NQOpenDataset(test_data)

def test_dataloaders():
    finetuning_dm = FineTuningDataModule(
        train_ds=train_ds,
        validation_ds=validation_ds,
        test_ds=test_ds,
        batch_size=hparams['batch_size'],
        collate_fn=collate_fn
    )

    train_dl = finetuning_dm.train_dataloader()
    validation_dl = finetuning_dm.val_dataloader()
    test_dl = finetuning_dm.test_dataloader()

    [
      x, 
      attention_mask, 
      y,
      question, 
      answer 
    ] = next(iter(train_dl))

    assert len(x) == batch_size
    assert len(attention_mask) == batch_size
    assert len(y) == batch_size
    assert len(question) == batch_size
    assert len(answer) == batch_size

    print(f'# of training batches: {len(train_dl)}')
    print(f'# of validation batches: {len(validation_dl)}')
    print(f'# of test batches: {len(test_dl)}')

    print(next(iter(train_dl))[4])
    print(next(iter(validation_dl))[4])

test_dataloaders()

# of training batches: 1237
# of validation batches: 138
# of test batches: 57
(['East Asia'], ['Bill Withers', 'Grover Washington Jr.'], ['Ernie'], ['eastern and southern Africa'], ['Wakanda'], ['Barry Bonds'], ['the duodenum'], ['August\xa04,\xa02000'], ['7 (per side)'], ['Norway'], ['Barry Gibb', 'Robin Gibb'], ['Columbia'], ['species'], ['Dick Clark'], ['1980'], ['William F. Galvin'], ['14'], ['Eslaah-Talabaan', 'reformists'], ['Maya Ying Lin'], ['2018'], ['2015'], ['Aloe Blacc'], ['February 20, 2017'], ['cervical dilatation'], ['Rocket propellant'], ['George II'], ['Montenegro'], ['Aditya Puri'], ['Bryce', 'Diggy', 'Brady'], ['Alan Silvestri'], ['Pete Seeger'], ['South Africa'], ['Ascona (Switzerland)'], ['thirteen'], ['ALASKA'], ['Humpty Alexander Dumpty'], ['Thomas Bjørn'], ['Eden Sher'], ['Trevor Nunn'], ['Patriots', 'Loyalists'], ['Tinker Bell', 'Snow White', 'Mulan', 'Pocahontas', 'Aurora', 'Ariel', 'Belle', 'Cinderella', 'Jasmine'], ['Nursing and Midwifery Accreditation Coun

In [None]:
import re
import pytorch_lightning as pl

from torch.optim import Adam, Optimizer
from adafactor_pytorch.adafactor import AdaFactor
from transformers import T5ForConditionalGeneration

EXTRA_ID_REGEX = re.compile(r'<extra_id_[0-9]+>')

class Model(pl.LightningModule):
    def __init__(
        self, 
        model_name: str, 
        tokenizer: Tokenizer, 
        lr: float,
        verbose: bool = False,
        optimizer_class = Adam,
        num_beams=1
    ):
        super().__init__()
        self.t5 = T5ForConditionalGeneration.from_pretrained(model_name)
        self.lr=lr
        self.verbose = verbose
        self.tokenizer = tokenizer
        self.correct_qa = []
        self.incorrect_qa = []
        self.optimizer_class = optimizer_class
        self.num_beams = num_beams

    def configure_optimizers(self):
        return self.optimizer_class(self.t5.parameters(), lr=self.lr)

    def forward(self, input_ids, attention_mask=None, labels=None):
        if self.training:
            return self.t5(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

        return self.t5.generate(input_ids, num_beams=self.num_beams)

    def training_step(self, batch, batch_idx):
        input_ids, attention_mask, labels, _, _ = batch
        loss = self.forward(input_ids, attention_mask, labels).loss
        return {'loss': loss}

    def _shared_eval_step(self, batch, save_results=False):
        input_ids, _, _, questions, answers = batch
        output = self.forward(input_ids)        

        acc = 0
        has_printed_already = False

        for predict, question, true_labels in zip(output, questions, answers):
            true_labels = [EXTRA_ID_REGEX.sub('', label).strip() for label in true_labels]
            decoded = self.tokenizer.decode(predict, skip_special_tokens=True)
            decoded = EXTRA_ID_REGEX.sub('', decoded).strip()

            correct = decoded in true_labels
            acc += int(correct)

            if save_results:
                qa = {'question': question, 'answer': true_labels}

                if correct:
                    self.correct_qa.append(qa)
                else:
                    self.incorrect_qa.append(qa)

            if self.verbose and not has_printed_already:
                print(f"Q:{question}\nA:{'; '.join(true_labels)}\nP:{decoded}\n")
                has_printed_already = True

        return acc / len(questions)

    def validation_step(self, batch, batch_idx):
        val_acc = self._shared_eval_step(batch)
        return {'val_acc': val_acc}

    def validation_epoch_end(self, outputs):
        epoch_acc = sum([output['val_acc'] for output in outputs]) / len(outputs)
        self.log('epoch_val_acc', epoch_acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        test_acc = self._shared_eval_step(batch, save_results=True)
        return {'test_acc': test_acc}

    def test_epoch_end(self, outputs):
        epoch_acc = sum([output['test_acc'] for output in outputs]) / len(outputs)
        self.log('epoch_test_acc', epoch_acc, prog_bar=True)
        

def test_model():
    m = Model(
      hparams['model_name'], 
      tokenizer, 
      lr=hparams['lr'],
      verbose=True,
      optimizer_class=Adam
    )

    debug_ds = NQOpenDataset(train_data)
    debug_validation_ds = NQOpenDataset(train_data)
    debug_dm = FineTuningDataModule(
        train_ds=debug_ds,
        validation_ds=debug_validation_ds,
        test_ds=debug_validation_ds,
        batch_size=128,
        use_shuffle_for_train=False,
        collate_fn=collate_fn,
        num_workers=4
    )

    trainer = pl.Trainer(
        gpus=1,
        overfit_batches=0.05,
        enable_checkpointing=False,
        max_epochs=30,
        check_val_every_n_epoch=10
    )
            
    trainer.fit(m, debug_dm)
    trainer.test(m, debug_dm)

# uncomment to "unit test" the model
# test_model()

In [None]:
import json
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_dirpath = 'drive/MyDrive/checkpoints'

checkpoint_filename = 'checkpoint_{epoch}_{epoch_val_acc:.2f}'
chkpt_path = 'drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt'

def save_answers(model):
    with open('answers_correct.json', 'w') as correct:
        correct.write(json.dumps(model.correct_qa))

    with open('answers_incorrect.json', 'w') as incorrect:
        incorrect.write(json.dumps(model.incorrect_qa))

model = Model(
    hparams['model_name'], 
    tokenizer, 
    lr=hparams['lr'],
    optimizer_class=Adam
)

model_checkpoint = ModelCheckpoint(
    dirpath=checkpoint_dirpath,
    filename=checkpoint_filename,
    mode='max',
    monitor='epoch_val_acc',
    auto_insert_metric_name=False,
    every_n_epochs=10,
    save_on_train_epoch_end=False
)

trainer = pl.Trainer(
    gpus=1,
    max_epochs=hparams['max_epochs'],
    check_val_every_n_epoch=5,
    callbacks=[model_checkpoint],
    accumulate_grad_batches=hparams['accumulate_grad_batches']
)

datamodule = FineTuningDataModule(
    train_ds=train_ds,
    validation_ds=validation_ds,
    test_ds=test_ds,
    collate_fn=collate_fn,
    batch_size=hparams['batch_size'],
    num_workers=4,
)

# uncomment to run the training
# trainer.fit(model, datamodule, ckpt_path=chkpt_path)
trainer.test(model, datamodule, ckpt_path=chkpt_path)
save_answers(model)

Downloading:   0%|          | 0.00/850M [00:00<?, ?B/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.22828103601932526}
--------------------------------------------------------------------------------


In [None]:
def load_questions_from_json(filename):
    with open(filename) as f:
        data = json.loads(f.read())

    return data

def verify_model_with_correct_questions(model):
    ds_correct = NQOpenDataset(
        load_questions_from_json('answers_correct.json')
    )

    dm_correct = FineTuningDataModule(
        train_ds=ds_correct,
        validation_ds=ds_correct,
        test_ds=ds_correct,
        batch_size=hparams['batch_size'],
        collate_fn=collate_fn
    )

    trainer = pl.Trainer(gpus=1)
    trainer.test(model, dm_correct)

verify_model_with_correct_questions(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 1.0}
--------------------------------------------------------------------------------


In [None]:
incorrect_answers = load_questions_from_json('answers_incorrect.json')
print(f'# of incorrect answers {len(incorrect_answers)}')

# of incorrect answers 2786


In [None]:
import spacy

random.seed(1234)
nlp = spacy.load("en_core_web_sm")

def filter_and_convert_to_sentences(questions, max_samples: int = None):
    filtered = []

    for question in questions:
        q = question['question']
        a = question['answer']
        sentence = None

        if len(a) > 1:
            continue

        a = a[0]

        if q.startswith('who is ') and q.endswith(' about'):
            l = len('who is ')
            sentence = f"{q[l:q.rfind(' about')]} is about {a}"

        elif q.startswith('who is ') and nlp(a)[-1].pos_ == 'PROPN':
            l = len('who')
            sentence = f'{a}{q[l:]}'

        elif q.startswith('when was ') and a.isnumeric():
            parsed = nlp(q)

            if parsed[-1].pos_ == 'VERB':
                last_word_idx = q.rfind(parsed[-1].text)
                sentence = f"{q[len('when was '):last_word_idx]}was {parsed[-1].text} in {a}"

        if sentence is not None:
            filtered.append([sentence, q, a])

    if max_samples is None:
        max_samples = len(filtered)

    random.shuffle(filtered)
    return filtered[:min(max_samples, len(filtered))]

pretraining_data = filter_and_convert_to_sentences(incorrect_answers, 32)

print('# of pretraining examples', len(pretraining_data))
pt_sentences, pt_questions, pt_answers = zip(*pretraining_data)
print('\n'.join(pt_sentences))

# of pretraining examples 32
Felix Baumgartner is the guy that jumped from space
TBD is new prime minister of italy 2018
James Chadwick is credited with the discovery of the neutron
Dhrubajyoti Bora is the present president of assam sahitya sabha
Alicia Vikander as Lara Croft is the cast of the new tomb raider movie
Jennifer Holliday is the original singer of you re going to love me
Edgar Lungu is the president of the republic of zambia
Giovanni Ribisi is the actor that plays sneaky pete
Catherine Tramell is the real killer in basic instinct 2
the bridge over the hoover dam was built in 2010
John Quincy Adams is the sixth president of the united states
Andrew Collins is jared on the bold and the beautiful
W. Edwards Deming is the originator of the plan-do-check-act model of performance improvement
Rafael Nadal is the no. 1 ranked tennis player in the world
Jonathan Cheban is the guy on keeping up with the kardashians
Jesse McCartney is jojo in horton hears a who
James Madison is pictur

In [None]:
import numpy as np

np.random.seed(1234)

class MaskedDataset(Dataset):
    def __init__(self, sentences):
        self.x, self.y = self._mask_random(sentences)

    def _mask_random(self, sentences):
        xs = []
        ys = []

        for sentence in sentences:
            words = sentence.split(' ')

            [start0, end0] = sorted(np.random.choice(
                np.arange(len(words)),
                2,
                replace=False
            ))

            end0 = min(end0, start0 + 1)

            part0 = ' '.join(words[start0:end0])

            xs.append(sentence.replace(part0, '<extra_id_0>'))
            ys.append([f"<extra_id_0> {part0} <extra_id_1>"])

        return xs, ys

    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]

        return x, y

    def __len__(self):
        return len(self.y)

def test_masked_dataset():
    ds = MaskedDataset(pt_sentences)

    print(ds[2])
    print(ds[1])
    print(ds[19])

test_masked_dataset()

('James Chadwick is <extra_id_0> with the discovery of the neutron', ['<extra_id_0> credited <extra_id_1>'])
('TBD is new prime <extra_id_0> of italy 2018', ['<extra_id_0> minister <extra_id_1>'])
('Merle Dixon is darrell <extra_id_0> in the walking dead', ['<extra_id_0> brother <extra_id_1>'])


In [None]:
sampled_questions_and_answers = [
    {'question': q, 'answer': [a]} 
    for q, a in zip(pt_questions, pt_answers)
]

concat_samples = [
    f"{q['question']} {q['answer'][0]}" 
    for q in sampled_questions_and_answers 
]

correct_answers = load_questions_from_json('answers_correct.json')

ds_questions_incorrect = NQOpenDataset(incorrect_answers)
ds_questions_correct = NQOpenDataset(correct_answers)
ds_questions_incorrect_sample = NQOpenDataset(sampled_questions_and_answers)

ds_sentences_incorrect_sample_masked = MaskedDataset(pt_sentences)
ds_concat = MaskedDataset(concat_samples)
ds_masked_answer = MaskedAnswerDataset(sampled_questions_and_answers)

torch.manual_seed(1234)

dm_masked_sentences = FineTuningDataModule(
    train_ds=ds_sentences_incorrect_sample_masked,
    validation_ds=ds_questions_incorrect_sample,
    test_ds=ds_questions_incorrect,
    collate_fn=collate_fn,
    batch_size=len(ds_sentences_incorrect_sample_masked)
)

dm_regular = FineTuningDataModule(
    train_ds=ds_questions_incorrect_sample,
    validation_ds=ds_questions_incorrect_sample,
    test_ds=ds_questions_incorrect,
    collate_fn=collate_fn,
    batch_size=len(ds_questions_incorrect_sample)
)

dm_concat = FineTuningDataModule(
    train_ds=ds_concat,
    validation_ds=ds_questions_incorrect_sample,
    test_ds=ds_questions_incorrect,
    collate_fn=collate_fn,
    batch_size=len(ds_concat)
)

dm_masked_answers = FineTuningDataModule(
    train_ds=ds_masked_answer,
    validation_ds=ds_questions_incorrect_sample,
    test_ds=ds_questions_incorrect,
    collate_fn=collate_fn,
    batch_size=len(ds_masked_answer)
)

datasets = [
    ('incorrect_questions', ds_questions_incorrect), 
    ('correct_questions', ds_questions_correct), 
    ('incorrect_sampled_set', ds_questions_incorrect_sample), 
    ('full_test_set', test_ds),
    ('train_ds', train_ds),
    ('validation_ds', validation_ds)
]

class AddResultsCallback(pl.Callback):
    def __init__(self):
        self.dataset_results = {}
        self.current_dataset_index = None

    def set_current_dataset_index(self, index):
        self.current_dataset_index = index

    def on_test_end(self, trainer, pl_module):
        if self.current_dataset_index is None:
            return

        value = trainer._results['test_epoch_end.epoch_test_acc'].value.cpu().numpy()
        accuracies = self.dataset_results.get(self.current_dataset_index, [])
        accuracies.append(value)
        self.dataset_results[self.current_dataset_index] = accuracies

def fit_and_test(fit_datamodule, accum_batches=1, test_only=False):
    callback = AddResultsCallback()
    early_stopping = pl.callbacks.EarlyStopping(
        mode='max',
        monitor='epoch_val_acc',
        patience=4,
        min_delta=0.03,
        stopping_threshold=1.0
    )

    ne = 15

    trainer = pl.Trainer(
        gpus=1, 
        max_epochs=190 + ne,
        accumulate_grad_batches=accum_batches, 
        callbacks=[callback, early_stopping]
    )

    if not test_only:
        trainer.fit(model, fit_datamodule, ckpt_path=chkpt_path)

    for index, test_dataset in datasets:
        callback.set_current_dataset_index(index)
        fit_datamodule.test_ds = test_dataset
        trainer.test(model, fit_datamodule, ckpt_path=chkpt_path if test_only else None)

    return callback.dataset_results

In [None]:
print('FINETUNING\n')
results_ft = fit_and_test(dm_regular)
print(results_ft)

print('MLM CONVERTED\n')
results_mlm = fit_and_test(dm_masked_sentences)
print(results_mlm)

print('CONCAT\n')
results_ct = fit_and_test(dm_concat)
print(results_ct)

print('MASKED ANSWERS\n')
results_ma = fit_and_test(dm_masked_answers)
print(results_ma)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


FINETUNING



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "You're resuming from a checkpoint that ended mid-epoch."
Restored all states from the checkpoint file at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt

  | Name | Type                       | Params
----------------------------------------------------
0 | t5   | T5ForConditionalGeneration | 222 M 
----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.026278408244252205}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.7780448794364929}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 1.0}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.19805139303207397}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.9371299147605896}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.18975454568862915}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


{'incorrect_questions': [array(0.02627841, dtype=float32)], 'correct_questions': [array(0.7780449, dtype=float32)], 'incorrect_sampled_set': [array(1., dtype=float32)], 'full_test_set': [array(0.1980514, dtype=float32)], 'train_ds': [array(0.9371299, dtype=float32)], 'validation_ds': [array(0.18975455, dtype=float32)]}
MLM CONVERTED



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Restored all states from the checkpoint file at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt

  | Name | Type                       | Params
----------------------------------------------------
0 | t5   | T5ForConditionalGeneration | 222 M 
----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.008167613297700882}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.26362180709838867}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.03125}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.06671205163002014}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.3198959529399872}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.07085908949375153}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


{'incorrect_questions': [array(0.00816761, dtype=float32)], 'correct_questions': [array(0.2636218, dtype=float32)], 'incorrect_sampled_set': [array(0.03125, dtype=float32)], 'full_test_set': [array(0.06671205, dtype=float32)], 'train_ds': [array(0.31989595, dtype=float32)], 'validation_ds': [array(0.07085909, dtype=float32)]}
CONCAT



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Restored all states from the checkpoint file at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt

  | Name | Type                       | Params
----------------------------------------------------
0 | t5   | T5ForConditionalGeneration | 222 M 
----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.0049715908244252205}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.28205129504203796}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.0}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.06830751895904541}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.3579750657081604}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.07298636436462402}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


{'incorrect_questions': [array(0.00497159, dtype=float32)], 'correct_questions': [array(0.2820513, dtype=float32)], 'incorrect_sampled_set': [array(0., dtype=float32)], 'full_test_set': [array(0.06830752, dtype=float32)], 'train_ds': [array(0.35797507, dtype=float32)], 'validation_ds': [array(0.07298636, dtype=float32)]}
MASKED ANSWERS



LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Restored all states from the checkpoint file at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt

  | Name | Type                       | Params
----------------------------------------------------
0 | t5   | T5ForConditionalGeneration | 222 M 
----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.013139204122126102}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.17147435247898102}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.78125}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.049225661903619766}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.2009289562702179}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.04560000076889992}
--------------------------------------------------------------------------------
{'incorrect_questions': [array(0.0131392, dtype=float32)], 'correct_questions': [array(0.17147435, dtype=float32)], 'incorrect_sampled_set': [array(0.78125, dtype=float32)], 'full_test_set': [array(0.04922566, dtype=float32)], 'train_ds': [array(0.20092896, dtype=float32)], 'validation_ds': [array(0.0456, dtype=float32)]}


In [None]:
np.random.seed(1234)
sampled_correct_answers = np.random.choice(correct_answers, 32, replace=False)

class MergedMaskedAnswerDataset(MaskedAnswerDataset):
    def __init__(self, data0, data1, init_mask=0, use_same_mask=True):
        x0, y0 = self._mask(data0, init_mask)

        second_init_mask = init_mask if use_same_mask else init_mask + 2
        x1, y1 = self._mask(data1, second_init_mask)

        self.x = x0 + x1
        self.y = y0 + y1

class MergedNQOpenDataset(NQOpenDataset):
    def __init__(self, dataset0, dataset1):
        self._dataset = np.concatenate((dataset0, dataset1), axis=0)

def test_merged_masked_answer_dataset():
    m = MergedMaskedAnswerDataset(
        sampled_correct_answers,
        sampled_questions_and_answers
    )

    print(len(m))
    print(m[0])
    print(m[-1])

    m2 = MergedNQOpenDataset(
        sampled_correct_answers,
        sampled_questions_and_answers
    )

    print(len(m2))
    print(m2[0])
    print(m2[-1])

test_merged_masked_answer_dataset()

64
('who played ashley on the young and the restless <extra_id_0>', ['<extra_id_0> Brenda Epperson <extra_id_1>'])
('who is the girl that died in eyes wide shut <extra_id_0>', ['<extra_id_0> Mandy <extra_id_1>'])
64
('who played ashley on the young and the restless', ['Brenda Epperson', 'Shari Shattuck', 'Eileen Davidson'])
('who is the girl that died in eyes wide shut', ['Mandy'])


In [None]:
ds_masked_answer_merged = MergedNQOpenDataset(
    sampled_correct_answers,
    sampled_questions_and_answers
)

dm_masked_answers_merged = FineTuningDataModule(
    train_ds=ds_masked_answer_merged,
    validation_ds=ds_questions_incorrect_sample,
    test_ds=ds_questions_incorrect,
    collate_fn=collate_fn,
    batch_size=len(ds_masked_answer_merged)
)

print('MERGED ANSWERS')
results_merged = fit_and_test(dm_masked_answers_merged)
print(results_merged)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


MERGED ANSWERS


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "You're resuming from a checkpoint that ended mid-epoch."
Restored all states from the checkpoint file at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt

  | Name | Type                       | Params
----------------------------------------------------
0 | t5   | T5ForConditionalGeneration | 222 M 
----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.0273019727319479}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.8087225556373596}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 1.0}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.20567645132541656}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.9497199654579163}
--------------------------------------------------------------------------------


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.19504982233047485}
--------------------------------------------------------------------------------
{'incorrect_questions': [array(0.02730197, dtype=float32)], 'correct_questions': [array(0.80872256, dtype=float32)], 'incorrect_sampled_set': [array(1., dtype=float32)], 'full_test_set': [array(0.20567645, dtype=float32)], 'train_ds': [array(0.94971997, dtype=float32)], 'validation_ds': [array(0.19504982, dtype=float32)]}


In [None]:
print('BASELINE')
results_merged = fit_and_test(dm_masked_answers_merged, test_only=True)
print(results_merged)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


BASELINE


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.0}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 1.0}
--------------------------------------------------------------------------------


Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.0}
--------------------------------------------------------------------------------


Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.22828103601932526}
--------------------------------------------------------------------------------


Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.9719006419181824}
--------------------------------------------------------------------------------


Restoring states from the checkpoint path at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at drive/MyDrive/checkpoints/checkpoint_189_0.23.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'epoch_test_acc': 0.22500452399253845}
--------------------------------------------------------------------------------
{'incorrect_questions': [array(0., dtype=float32)], 'correct_questions': [array(1., dtype=float32)], 'incorrect_sampled_set': [array(0., dtype=float32)], 'full_test_set': [array(0.22828104, dtype=float32)], 'train_ds': [array(0.97190064, dtype=float32)], 'validation_ds': [array(0.22500452, dtype=float32)]}
