In [72]:
import torch
import os.path
from spacy.lang.en import English
import torch.nn.functional as F
from src.models.nliclassifier import NLIClassifier
from src.dataset.dataloaders import get_embeddings_for_data

In [73]:
# Make sure to enter the parent dir of the embedding vocab+vector file used for training
emb_vocab, emb_vecs = get_embeddings_for_data(
    dataset_path=os.path.join("..", "data", "processed")
)

In [74]:
# Load the model along with the embeddings
model = (
    NLIClassifier.load_from_checkpoint(
        os.path.join(
            "..",
            "checkpoint",
            "verify_me_train",
            "epoch=02-step=25752-val_loss=0.78.ckpt",
        ),
        strict=False,
        embedding_mat=emb_vecs,
    )
    .cpu()
    .eval()
)

In [75]:
tokenizer = English().tokenizer


def process_input(sent_1, sent_2):
    sent_tokens = [
        [token.text for token in tokenizer(sent.lower())] for sent in [sent_1, sent_2]
    ]
    indices = [emb_vocab(sent_tok) for sent_tok in sent_tokens]
    sents = [torch.tensor([idxs], dtype=torch.long) for idxs in indices]
    return sents

In [76]:
sent_1 = "An apple and an orange on a table"
sent_2 = "There are fruits present on the table"
raw_inputs = process_input(sent_1, sent_2)

In [77]:
label_map = {
    0: "entailment",
    1: "neutral",
    2: "contradiction",
}

In [78]:
out = model(*raw_inputs, 2, 2)
probs = F.softmax(out, dim=-1)
label = torch.argmax(probs, dim=1).unsqueeze(0).detach().item()

In [79]:
print(
    f"""
Sentence one: "{sent_1}"
Sentence two: "{sent_2}"
Model judgement: "{label_map[label]}"
"""
)


Sentence one: "An apple and an orange on a table"
Sentence two: "There are fruits present on the table"
Model judgement: "contradiction"

