In [1]:
%%sh
pwd

/home/ubuntu/whisper-dev/whisper-stream/notebooks


In [2]:
%load_ext autoreload
%load_ext viztracer

In [3]:
import jax

jax.device_count(), jax.devices()[0].device_kind

(1, 'NVIDIA A10G')

In [4]:
from whisper_stream.core.helpers.data_loading import load_data_samples_from_path
from whisper_stream.vendored.whisper_jax import (
    FlaxWhisperPipeline,
)
from whisper_stream.pipelines.jax_pipelines.constants import (
    JAXValidDtypesMapping,
    JAXScalarDType,
)

from whisper_stream.core.constants import WhisperValidCheckpoints, WhisperValidTasks

from whisper_stream.core.logger import LOG_LEVEL_NAMES
from pathlib import Path

%autoreload 2

event='creating directories for package' application='whisper_stream' version='0.0.4' python_version='3.11.5' platform_architecture=('64bit', 'ELF') path=PosixPath('/home/ubuntu/.whisper_stream') level='info' timestamp='2023-09-03T16:44:50.852595Z'
event='creating directories for package' application='whisper_stream' version='0.0.4' python_version='3.11.5' platform_architecture=('64bit', 'ELF') path=PosixPath('/home/ubuntu/.whisper_stream/.cache') level='info' timestamp='2023-09-03T16:44:50.853330Z'
event='creating directories for package' application='whisper_stream' version='0.0.4' python_version='3.11.5' platform_architecture=('64bit', 'ELF') path=PosixPath('/home/ubuntu/.whisper_stream/.data') level='info' timestamp='2023-09-03T16:44:50.853811Z'
event='finished setting up package directories' application='whisper_stream' version='0.0.4' python_version='3.11.5' platform_architecture=('64bit', 'ELF') level='info' timestamp='2023-09-03T16:44:50.854405Z'
event='creating directories for

In [5]:
# Prepare
checkpoint: WhisperValidCheckpoints = "openai/whisper-large-v2"
model_dtype: JAXScalarDType = JAXValidDtypesMapping["BFLOAT16"]
task: WhisperValidTasks = "transcribe"
language: str = "english"
return_timestamps: bool = True
batch_size: int = 1
log_level: LOG_LEVEL_NAMES = "INFO"

data_directory = Path("../data")

run_opts = {
    "batch_size": batch_size,
    "return_timestamps": return_timestamps,
    "language": language,
    "task": task,
}

# construct
pipeline = FlaxWhisperPipeline(
    checkpoint=checkpoint,
    dtype=model_dtype,
    batch_size=batch_size
)

In [6]:
# Load data
pipeline_data: bytes = load_data_samples_from_path(
    "audio_2.mp3", directory=data_directory, binary_mode=True
)  # 2s
pipeline_data_large: bytes = load_data_samples_from_path(
    "tryst.mp3", directory=data_directory, binary_mode=True
)  # 4:44s

In [7]:
# initialize & warmup
%time pipeline(pipeline_data, **run_opts)

CPU times: user 1min 5s, sys: 1.8 s, total: 1min 7s
Wall time: 53 s


{'text': ' I know all the players of cricket.',
 'chunks': [{'timestamp': (0.0, 2.8),
   'text': ' I know all the players of cricket.'}]}

In [8]:
# should be warmed up now (time should be similar to # small data)
%time pipeline(pipeline_data, **run_opts)

CPU times: user 478 ms, sys: 370 ms, total: 849 ms
Wall time: 442 ms


{'text': ' I know all the players of cricket.',
 'chunks': [{'timestamp': (0.0, 2.8),
   'text': ' I know all the players of cricket.'}]}

In [9]:
# small data
%time pipeline(pipeline_data, **run_opts)

CPU times: user 472 ms, sys: 451 ms, total: 922 ms
Wall time: 468 ms


{'text': ' I know all the players of cricket.',
 'chunks': [{'timestamp': (0.0, 2.8),
   'text': ' I know all the players of cricket.'}]}

In [10]:
# small data in batch
%time list([pipeline(data, **run_opts) for data in [pipeline_data] * 10])

CPU times: user 4.68 s, sys: 4.07 s, total: 8.74 s
Wall time: 4.47 s


[{'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'text': ' I know all the players of cricket.',
  'chunks': [{'timestamp': (0.0, 2.8),
    'text': ' I know all the players of cricket.'}]},
 {'tex

In [11]:
# chunkable data
%time pipeline(pipeline_data_large, **run_opts)

CPU times: user 19.8 s, sys: 6.42 s, total: 26.3 s
Wall time: 19.2 s


{'text': ' Long years ago, we made a truce with destiny, and now the time comes when we shall redeem our pledge, not only or in full measure, but very substantially. At the stroke of the midnight hour, when the world sleeps, India will awake to life and freedom. A moment comes, which comes but rarely in history, when we step out from the old to the new, when an age end, and when the soul of a nation, long suppressed, finds utterance. It is fitting that at this solemn moment we take the pledge of dedication to the service of India and her people, and to the still larger cause of humanity. At the dawn of history, India started on her unending quest, and trackless centuries are filled with her striving and the grandeur of her successes and her failures. Through good and ill fortune alike, she has never lost sight of that quest or forgotten the ideals which gave her strength. We end today a period of ill fortune, and India discovers herself again. The achievement we celebrate today is but 

In [12]:
# chunkable data in batches
%time list([pipeline(data, **run_opts) for data in [pipeline_data_large] * 10])

CPU times: user 3min 19s, sys: 1min 3s, total: 4min 23s
Wall time: 3min 12s


[{'text': ' Long years ago, we made a truce with destiny, and now the time comes when we shall redeem our pledge, not only or in full measure, but very substantially. At the stroke of the midnight hour, when the world sleeps, India will awake to life and freedom. A moment comes, which comes but rarely in history, when we step out from the old to the new, when an age end, and when the soul of a nation, long suppressed, finds utterance. It is fitting that at this solemn moment we take the pledge of dedication to the service of India and her people, and to the still larger cause of humanity. At the dawn of history, India started on her unending quest, and trackless centuries are filled with her striving and the grandeur of her successes and her failures. Through good and ill fortune alike, she has never lost sight of that quest or forgotten the ideals which gave her strength. We end today a period of ill fortune, and India discovers herself again. The achievement we celebrate today is but

: 