In [7]:
import test_bert
import pprint
import re
from collections.abc import Iterator
from functools import partial
from pathlib import Path

In [51]:
from train_tempobert import ModelArguments
import hf_utils
from bert_model import BertModel
from transformers import AutoModelForMaskedLM, pipeline


In [37]:
def predict_time(sentence, fill_mask_pipelines, print_results=True):
    if not isinstance(fill_mask_pipelines, list):
        fill_mask_pipelines = [fill_mask_pipelines]
    time_tokens = [f"<{time}>" for time in fill_mask_pipelines[0].model.config.times]
    result_dict = {}
    original_sentence = sentence
    sentence = "[MASK] " + sentence
    for model_i, fill_mask in enumerate(fill_mask_pipelines):
        fill_result = fill_mask(sentence, targets=time_tokens, truncation=True)
        result = {res["token_str"]: res["score"] for res in fill_result}
        if len(fill_mask_pipelines) == 1:
            result_dict = result
        else:
            result_dict[model_i] = result
        if print_results:
            res_str = ', '.join(
                f'{token} ({score:.2f})' for token, score in result.items()
            )
            if len(fill_mask_pipelines) > 1:
                print(f"{model_i}: {original_sentence}: {res_str}")
            else:
                print(f"{original_sentence}: {res_str}")
    return result_dict

In [40]:
result_dict = None
tester = test_bert.Tester('output/TempoBERT_2023-10-18_14-49-45/', device=0)
for fill_mask_pipeline in tester.fill_mask_pipelines:
    print(fill_mask_pipeline)
    result_dict = test_bert.predict_time(
        'Tracy did not repaire to hir hous that even and withstode Rileys berserkes', fill_mask_pipeline, print_results=False
    )
    print(result_dict)

2023-10-18 22:02:44.714 | INFO     | hf_utils:load_pretrained_model:123 - Loaded a pretrained model from output/TempoBERT_2023-10-18_14-49-45/


<transformers.pipelines.fill_mask.FillMaskPipeline object at 0x7fae65f7fc40>
{'<1990>': 0.3611648976802826, '<2000>': 0.2682936191558838, '<2010>': 0.22784477472305298, '<2020>': 0.05866082385182381}


In [52]:
def load_model(model_name_or_path, expect_times_in_model=True):
    model_args = ModelArguments(model_name_or_path=model_name_or_path)
    config_kwargs = {}
    model, tokenizer = hf_utils.load_pretrained_model(
        model_args,
        AutoModelForMaskedLM,
        expect_times_in_model=expect_times_in_model,
        **config_kwargs,
    )
    return model, tokenizer

In [58]:
model, tokenizer = load_model('output/TempoBERT_2023-10-18_14-49-45/')

2023-10-18 22:23:19.212 | INFO     | hf_utils:load_pretrained_model:123 - Loaded a pretrained model from output/TempoBERT_2023-10-18_14-49-45/


In [59]:
def predict_time(sentence, model, tokenizer, print_results=True):
    # if not isinstance(fill_mask_pipelines, list):
    #     fill_mask_pipelines = [fill_mask_pipelines]
    time_tokens = [f"<{time}>" for time in model.config.times]
    result_dict = {}
    original_sentence = sentence 
    sentence = "[MASK] " + sentence
    pret_time_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer, device=0)
    out_time_preds = pret_time_mask(sentence, targets=time_tokens)
    result = {res["token_str"]: res["score"] for res in out_time_preds}
    result_dict = {"sentence":sentence, "time_scores":result}
    print(result_dict)
    return result_dict

result_dict = predict_time(
    'Tracy did not repaire to hir hous that even and withstode Rileys berserkes', model, tokenizer, print_results=False
)

{'sentence': '[MASK] Tracy did not repaire to hir hous that even and withstode Rileys berserkes', 'time_scores': {'<1990>': 0.3611648976802826, '<2000>': 0.2682936191558838, '<2010>': 0.22784477472305298, '<2020>': 0.05866082385182381}}


In [60]:
def predict_time(sentence, model, tokenizer, print_results=True):
    # if not isinstance(fill_mask_pipelines, list):
    #     fill_mask_pipelines = [fill_mask_pipelines]
    time_tokens = [f"<{time}>" for time in model.config.times]
    result_dict = {}
    original_sentence = sentence 
    sentence = "[MASK] " + sentence
    pret_time_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer, device=0)
    out_time_preds = pret_time_mask(sentence, targets=time_tokens)
    result = {res["token_str"]: res["score"] for res in out_time_preds}
    result_dict = {"sentence":sentence, "time_scores":result}
    print(result_dict)
    return result_dict

result_dict = predict_time(
    'Kendall\'s furry friend was carrying a few extra pounds, so they took it on a five-mile hike to burn off some calories', model, tokenizer, print_results=False
)

{'sentence': "[MASK] Kendall's furry friend was carrying a few extra pounds, so they took it on a five-mile hike to burn off some calories", 'time_scores': {'<1990>': 0.3623791038990021, '<2000>': 0.31215235590934753, '<2010>': 0.26683083176612854, '<2020>': 0.018203917890787125}}


In [61]:
def predict_time(sentence, model, tokenizer, print_results=True):
    # if not isinstance(fill_mask_pipelines, list):
    #     fill_mask_pipelines = [fill_mask_pipelines]
    time_tokens = [f"<{time}>" for time in model.config.times]
    result_dict = {}
    original_sentence = sentence 
    sentence = "[MASK] " + sentence
    pret_time_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer, device=0)
    out_time_preds = pret_time_mask(sentence, targets=time_tokens)
    result = {res["token_str"]: res["score"] for res in out_time_preds}
    result_dict = {"sentence":sentence, "time_scores":result}
    print(result_dict)
    return result_dict

result_dict = predict_time(
    'Kendall\'s pup was a bit too plump, so they had to walk it for five miles to help it shed some pounds', model, tokenizer, print_results=False
)

{'sentence': "[MASK] Kendall's pup was a bit too plump, so they had to walk it for five miles to help it shed some pounds", 'time_scores': {'<2000>': 0.38748985528945923, '<1990>': 0.2741401195526123, '<2010>': 0.26480770111083984, '<2020>': 0.02392939291894436}}


In [62]:
def predict_time(sentence, model, tokenizer, print_results=True):
    # if not isinstance(fill_mask_pipelines, list):
    #     fill_mask_pipelines = [fill_mask_pipelines]
    time_tokens = [f"<{time}>" for time in model.config.times]
    result_dict = {}
    original_sentence = sentence 
    sentence = "[MASK] " + sentence
    pret_time_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer, device=0)
    out_time_preds = pret_time_mask(sentence, targets=time_tokens)
    result = {res["token_str"]: res["score"] for res in out_time_preds}
    result_dict = {"sentence":sentence, "time_scores":result}
    print(result_dict)
    return result_dict

result_dict = predict_time(
    ' Kendall\'s dog was overweight so they walked it five miles.', model, tokenizer, print_results=False
)

{'sentence': "[MASK]  Kendall's dog was overweight so they walked it five miles.", 'time_scores': {'<2000>': 0.3820279836654663, '<2010>': 0.2790084183216095, '<1990>': 0.250939279794693, '<2020>': 0.03328210115432739}}
