# Imports

In [5]:
import torch
from torchtext import data

from model.predictor import Predictor
from model.trainer import Trainer, get_predictor_predictions

from utils import torch_utils

# Load Saved Model

In [3]:
# set this to whichever run you want the model of
best_model_path = "saved_models/tacred/DualRE/0.1_0.5/09-04_12-04/predictor/"

In [None]:
model_file = best_model_path + "best_model.pt"
model_config = best_model_path + "config.json"
model_type = "predictor"

model_opt = torch_utils.load_config(model_file)
predictor = Predictor(model_opt)
model = Trainer(model_opt, predictor, model_type=model_type)
model.load(model_file)

# Load Datasets

In [7]:
TOKEN = data.Field(sequential=True, batch_first=True, lower=True, include_lengths=True)
RELATION = data.Field(sequential=False, pad_token=None)
POS = data.Field(sequential=True, batch_first=True)
NER = data.Field(sequential=True, batch_first=True)
PST = data.Field(sequential=True, batch_first=True)
PR_CONFIDENCE = data.Field(sequential=False, use_vocab=False, dtype=torch.float)
SL_CONFIDENCE = data.Field(sequential=False, use_vocab=False, dtype=torch.float)

FIELDS = {
    "tokens": ("token", TOKEN),
    "stanford_pos": ("pos", POS),
    "stanford_ner": ("ner", NER),
    "relation": ("relation", RELATION),
    "subj_pst": ("subj_pst", PST),
    "obj_pst": ("obj_pst", PST),
    "pr_confidence": ("pr_confidence", PR_CONFIDENCE),
    "sl_confidence": ("sl_confidence", SL_CONFIDENCE),
}

dataset_vocab = data.TabularDataset(path=model_opt["data_dir"] + "/train.json", format="json", fields=FIELDS)
dataset_train = data.TabularDataset(path=model_opt["data_dir"] + "/train-" + str(model_opt["labeled_ratio"]) + ".json",
                                    format="json", fields=FIELDS)
dataset_dev = data.TabularDataset(path=model_opt["data_dir"] + "/dev.json", format="json", fields=FIELDS)
dataset_test = data.TabularDataset(path=model_opt["data_dir"] + "/test.json", format="json", fields=FIELDS)

TOKEN.build_vocab(dataset_vocab)
RELATION.build_vocab(dataset_vocab)
POS.build_vocab(dataset_vocab)
NER.build_vocab(dataset_vocab)
PST.build_vocab(dataset_vocab)
TOKEN.vocab.load_vectors("glove.840B.300d", cache="./dataset/.vectors_cache")

100%|█████████▉| 2196016/2196017 [02:48<00:00, 13021.94it/s]


# Generate Predictions

In [12]:
golds_train, predictions_train = get_predictor_predictions(model, dataset_train)
golds_dev, predictions_dev = get_predictor_predictions(model, dataset_dev)
golds_test, predictions_test = get_predictor_predictions(model, dataset_test)