From c878998f970e2b9b01e74d5e35b2d95e0f634216 Mon Sep 17 00:00:00 2001 From: juanmc2005 Date: Sat, 27 May 2023 17:04:42 +0200 Subject: [PATCH] Replace block_size by block_duration in audio source constructors --- src/diart/blocks/base.py | 3 --- src/diart/console/client.py | 2 +- src/diart/console/stream.py | 3 +-- src/diart/inference.py | 2 +- src/diart/sources.py | 16 ++++++++-------- 5 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/diart/blocks/base.py b/src/diart/blocks/base.py index 11ef961d..c23ce032 100644 --- a/src/diart/blocks/base.py +++ b/src/diart/blocks/base.py @@ -58,9 +58,6 @@ def get_file_padding(self, filepath: FilePath) -> Tuple[float, float]: left = utils.get_padding_left(file_duration + right, self.duration) return left, right - def optimal_block_size(self) -> int: - return int(np.rint(self.step * self.sample_rate)) - class Pipeline: @staticmethod diff --git a/src/diart/console/client.py b/src/diart/console/client.py index eba17020..d1896ec6 100644 --- a/src/diart/console/client.py +++ b/src/diart/console/client.py @@ -15,7 +15,7 @@ def send_audio(ws: WebSocket, source: Text, step: float, sample_rate: int): # Create audio source source_components = source.split(":") if source_components[0] != "microphone": - audio_source = src.FileAudioSource(source, sample_rate) + audio_source = src.FileAudioSource(source, sample_rate, block_duration=step) else: device = int(source_components[1]) if len(source_components) > 1 else None audio_source = src.MicrophoneAudioSource(step, device) diff --git a/src/diart/console/stream.py b/src/diart/console/stream.py index d270774b..fd7df5eb 100644 --- a/src/diart/console/stream.py +++ b/src/diart/console/stream.py @@ -40,13 +40,12 @@ def run(): pipeline = pipeline_class(config) # Manage audio source - block_size = config.optimal_block_size() source_components = args.source.split(":") if source_components[0] != "microphone": args.source = Path(args.source).expanduser() args.output = args.source.parent if args.output is None else Path(args.output) padding = config.get_file_padding(args.source) - audio_source = src.FileAudioSource(args.source, config.sample_rate, padding, block_size) + audio_source = src.FileAudioSource(args.source, config.sample_rate, padding, config.step) pipeline.set_timestamp_shift(-padding[0]) else: args.output = Path("~/").expanduser() if args.output is None else Path(args.output) diff --git a/src/diart/inference.py b/src/diart/inference.py index 4a3a937b..99e5c757 100644 --- a/src/diart/inference.py +++ b/src/diart/inference.py @@ -320,7 +320,7 @@ def run_single( filepath, pipeline.config.sample_rate, padding, - pipeline.config.optimal_block_size(), + pipeline.config.step, ) pipeline.set_timestamp_shift(-padding[0]) inference = StreamingInference( diff --git a/src/diart/sources.py b/src/diart/sources.py index 490591e6..f14e2a4f 100644 --- a/src/diart/sources.py +++ b/src/diart/sources.py @@ -55,23 +55,23 @@ class FileAudioSource(AudioSource): padding: (float, float) Left and right padding to add to the file (in seconds). Defaults to (0, 0). - block_size: int - Number of samples per chunk emitted. - Defaults to 1000. + block_duration: int + Duration of each emitted chunk in seconds. + Defaults to 0.5 seconds. """ def __init__( self, file: FilePath, sample_rate: int, padding: Tuple[float, float] = (0, 0), - block_size: int = 1000, + block_duration: float = 0.5, ): super().__init__(Path(file).stem, sample_rate) self.loader = AudioLoader(self.sample_rate, mono=True) self._duration = self.loader.get_duration(file) self.file = file self.resolution = 1 / self.sample_rate - self.block_size = block_size + self.block_size = int(np.rint(block_duration * self.sample_rate)) self.padding_start, self.padding_end = padding self.is_closed = False @@ -132,7 +132,7 @@ class MicrophoneAudioSource(AudioSource): Parameters ---------- block_duration: int - Duration of each chunk emitted in seconds. + Duration of each emitted chunk in seconds. Defaults to 0.5 seconds. device: int | str | (int, str) | None Device identifier compatible for the sounddevice stream. @@ -268,10 +268,10 @@ def __init__( sample_rate: int, streamer: StreamReader, stream_index: Optional[int] = None, - block_size: int = 1000, + block_duration: float = 0.5, ): super().__init__(uri, sample_rate) - self.block_size = block_size + self.block_size = int(np.rint(block_duration * self.sample_rate)) self._streamer = streamer self._streamer.add_basic_audio_stream( frames_per_chunk=self.block_size,