Skip to content

Randomly getting error while generating word timestamps #59

@rahulmate

Description

@rahulmate

code
`model = whisper_s2t.load_model(model_identifier="large-v2", asr_options={'word_timestamps': True},backend='TensorRT-LLM')

files = ['output.wav']
lang_codes = ['en']
tasks = ['transcribe']
initial_prompts = [None]

out = model.transcribe_with_vad(files,
lang_codes=lang_codes,
tasks=tasks,
initial_prompts=initial_prompts,
batch_size=16)`

For above code sometime it throws in below error for same file. Is there any explanation for it.
`RuntimeError Traceback (most recent call last)
Cell In[15], line 10
8 initial_prompts = [None]
9 start =time.time()
---> 10 out = model.transcribe_with_vad(files,
11 lang_codes=lang_codes,
12 tasks=tasks,
13 initial_prompts=initial_prompts,
14 batch_size=16)
15 end =time.time()
16 print(f"batch :: {16} time:: {end-start}")

File ~/temp_triton/triton_env/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)

File ~/temp_triton/triton_env/lib/python3.10/site-packages/whisper_s2t/backends/init.py:171, in WhisperModel.transcribe_with_vad(self, audio_files, lang_codes, tasks, initial_prompts, batch_size)
169 for signals, prompts, seq_len, seg_metadata, pbar_update in self.data_loader(audio_files, lang_codes, tasks, initial_prompts, batch_size=batch_size):
170 mels, seq_len = self.preprocessor(signals, seq_len)
--> 171 res = self.generate_segment_batched(mels.to(self.device), prompts, seq_len, seg_metadata)
173 for res_idx, _seg_metadata in enumerate(seg_metadata):
174 responses[_seg_metadata['file_id']].append({**res[res_idx],
175 'start_time': round(_seg_metadata['start_time'], 3),
176 'end_time': round(_seg_metadata['end_time'], 3)})

File ~/temp_triton/triton_env/lib/python3.10/site-packages/whisper_s2t/backends/tensorrt/model.py:248, in WhisperModelTRT.generate_segment_batched(self, features, prompts, seq_lens, seg_metadata)
246 text_tokens = [[_t for _t in x[0] if t < self.tokenizer.eot]+[self.tokenizer.eot] for x in result]
247 sot_seqs = [tuple(
[-4:]) for _ in prompts]
--> 248 word_timings = self.align_words(features, texts, text_tokens, sot_seqs, seq_lens, seg_metadata)
250 for _response, _word_timings in zip(response, word_timings):
251 _response['word_timestamps'] = _word_timings

File ~/temp_triton/triton_env/lib/python3.10/site-packages/whisper_s2t/backends/tensorrt/model.py:200, in WhisperModelTRT.align_words(self, features, texts, text_tokens, sot_seqs, seq_lens, seg_metadata)
198 token_alignments = [[] for _ in seg_metadata]
199 for start_seq, req_idx in start_seq_wise_req.items():
--> 200 res = self.aligner_model.align(ctranslate2.StorageView.from_array(features[req_idx]),
201 start_sequence=list(start_seq),
202 text_tokens=[text_tokens[_] for _ in req_idx],
203 num_frames=list(seq_lens[req_idx].detach().cpu().numpy()),
204 median_filter_width=7)
206 for _res, _req_idx in zip(res, req_idx):
207 token_alignments[_req_idx] = _res

RuntimeError: No position encodings are defined for positions >= 448, but got position 454`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions