In [1]:
from tokenizer import SentencePieceTokenizer
from model import CausalConformerModel
from data import LibriSpeechDataset, get_dataloader
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TOKENIZER_MODEL_FILE_PATH = "./vocabs/librispeech_1024_bpe.model"
DATASET_JSON_FILE_PATH = "./json/librispeech_train-clean-100.json"
MODEL_FILE_PATH = "./artifacts/librispeech_small_check/f9c410c7599f48ca99e541afadcc4ebd/artifacts/model_25.pth"
DEVICE = "cuda"

In [3]:
tokenizer = SentencePieceTokenizer(
    model_file_path=TOKENIZER_MODEL_FILE_PATH,
)
dataset = LibriSpeechDataset(
    resampling_rate=16000,
    tokenizer=tokenizer,
    json_file_path=DATASET_JSON_FILE_PATH,
)
dataloader = get_dataloader(
    dataset,
    batch_sec=30,
    num_workers=8,
    pad_idx=tokenizer.pad_token_id,
    pin_memory=True,
)

Batch Prepare: 100%|██████████| 28539/28539 [00:00<00:00, 342325.33it/s]


In [4]:
with open(MODEL_FILE_PATH, "rb") as f:
    cpt = torch.load(f)
model_state = cpt["model"]
model_args = cpt["model_args"]
model = CausalConformerModel(**model_args).to(DEVICE)
model.load_state_dict(model_state)

<All keys matched successfully>

In [5]:
_, benc_input, bpred_input, benc_input_length, bpred_input_length, baudio_sec = next(iter(dataloader))
benc_input = benc_input.to(DEVICE)
bpred_input = bpred_input.to(DEVICE)

bhyp_token_indices = model.streaming_greedy_inference(
    enc_inputs=benc_input, enc_input_lengths=benc_input_length
)
bans_token_indices = [
        bpred_input[i, : bpred_input_length[i]].tolist() for i in range(bpred_input.shape[0])
]
bhyp_text = tokenizer.batch_token_ids_to_text(bhyp_token_indices)
bans_text = tokenizer.batch_token_ids_to_text(bans_token_indices)
for hyp, ans in zip(bhyp_text, bans_text):
    print(f"hyp: {hyp}")
    print(f"ans: {ans}")
    print()

hyp: and drive me to invent falsehoods and replied but all this was suddenly brought to an end for the time came when all such considerations were disrearted and there was no further question of honor when my patience gave way and the secret of my heart became known abroad
ans: and drive me to invent falsehoods in reply but all this was suddenly brought to an end for the time came when all such considerations were disregarded and there was no further question of honour when my patience gave way and the secret of my heart became known abroad

hyp: that would be turning your visit into an evil indeed wherever you are you should always be contented but especially at home because there you must spend the most of your time i did not quite like
ans: that would be turning your visit into an evil indeed wherever you are you should always be contented but especially at home because there you must spend the most of your time i did not quite like at breakfast



In [6]:
bhyp_nbest_token_indices = model.beamsearch_inference(
    enc_inputs=benc_input, enc_input_lengths=benc_input_length, tokenizer=tokenizer, beam_size=2
)
bans_token_indices = [
        bpred_input[i, : bpred_input_length[i]].tolist() for i in range(bpred_input.shape[0])
]
bans_text = tokenizer.batch_token_ids_to_text(bans_token_indices)

353 best hypothesis: and dried me to advant falsehoods in reply but all this was suddenly brought to an end for the time came when all such considerations were disregarded and there was no further question of honor when my patience gave way and the secret of my heart became known abroad
364 best hypothesis: that would be turning your visit into an evil indeed wherever you are you should always be contented but especially at home because there you must spend the most of your time i did not quite like at breakfast


In [8]:
for i, ans in enumerate(bans_text):
    print(f"ans: {ans}")
    for j, hyp_tokens in enumerate(bhyp_nbest_token_indices[i]):
        hyp = tokenizer.token_ids_to_text(hyp_tokens)
        print(f"hyp{j}: {hyp}")

ans: and drive me to invent falsehoods in reply but all this was suddenly brought to an end for the time came when all such considerations were disregarded and there was no further question of honour when my patience gave way and the secret of my heart became known abroad
hyp0: and dried me to advant falsehoods in reply but all this was suddenly brought to an end for the time came when all such considerations were disregarded and there was no further question of honor when my patience gave way and the secret of my heart became known abroad
hyp1: and dried me to advant falsehoods in reply but all this was suddenly brought to an end for the time came when all such considerations were disregarded and there was no further question of honor when my patience gave way and the secret of my heart became known abro
ans: that would be turning your visit into an evil indeed wherever you are you should always be contented but especially at home because there you must spend the most of your time i d