Skip to content

Commit

Permalink
Replace block_size by block_duration in audio source constructors
Browse files Browse the repository at this point in the history
  • Loading branch information
juanmc2005 committed May 27, 2023
1 parent f3666f7 commit c878998
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 15 deletions.
3 changes: 0 additions & 3 deletions src/diart/blocks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ def get_file_padding(self, filepath: FilePath) -> Tuple[float, float]:
left = utils.get_padding_left(file_duration + right, self.duration)
return left, right

def optimal_block_size(self) -> int:
return int(np.rint(self.step * self.sample_rate))


class Pipeline:
@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion src/diart/console/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def send_audio(ws: WebSocket, source: Text, step: float, sample_rate: int):
# Create audio source
source_components = source.split(":")
if source_components[0] != "microphone":
audio_source = src.FileAudioSource(source, sample_rate)
audio_source = src.FileAudioSource(source, sample_rate, block_duration=step)
else:
device = int(source_components[1]) if len(source_components) > 1 else None
audio_source = src.MicrophoneAudioSource(step, device)
Expand Down
3 changes: 1 addition & 2 deletions src/diart/console/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,12 @@ def run():
pipeline = pipeline_class(config)

# Manage audio source
block_size = config.optimal_block_size()
source_components = args.source.split(":")
if source_components[0] != "microphone":
args.source = Path(args.source).expanduser()
args.output = args.source.parent if args.output is None else Path(args.output)
padding = config.get_file_padding(args.source)
audio_source = src.FileAudioSource(args.source, config.sample_rate, padding, block_size)
audio_source = src.FileAudioSource(args.source, config.sample_rate, padding, config.step)
pipeline.set_timestamp_shift(-padding[0])
else:
args.output = Path("~/").expanduser() if args.output is None else Path(args.output)
Expand Down
2 changes: 1 addition & 1 deletion src/diart/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def run_single(
filepath,
pipeline.config.sample_rate,
padding,
pipeline.config.optimal_block_size(),
pipeline.config.step,
)
pipeline.set_timestamp_shift(-padding[0])
inference = StreamingInference(
Expand Down
16 changes: 8 additions & 8 deletions src/diart/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,23 @@ class FileAudioSource(AudioSource):
padding: (float, float)
Left and right padding to add to the file (in seconds).
Defaults to (0, 0).
block_size: int
Number of samples per chunk emitted.
Defaults to 1000.
block_duration: int
Duration of each emitted chunk in seconds.
Defaults to 0.5 seconds.
"""
def __init__(
self,
file: FilePath,
sample_rate: int,
padding: Tuple[float, float] = (0, 0),
block_size: int = 1000,
block_duration: float = 0.5,
):
super().__init__(Path(file).stem, sample_rate)
self.loader = AudioLoader(self.sample_rate, mono=True)
self._duration = self.loader.get_duration(file)
self.file = file
self.resolution = 1 / self.sample_rate
self.block_size = block_size
self.block_size = int(np.rint(block_duration * self.sample_rate))
self.padding_start, self.padding_end = padding
self.is_closed = False

Expand Down Expand Up @@ -132,7 +132,7 @@ class MicrophoneAudioSource(AudioSource):
Parameters
----------
block_duration: int
Duration of each chunk emitted in seconds.
Duration of each emitted chunk in seconds.
Defaults to 0.5 seconds.
device: int | str | (int, str) | None
Device identifier compatible for the sounddevice stream.
Expand Down Expand Up @@ -268,10 +268,10 @@ def __init__(
sample_rate: int,
streamer: StreamReader,
stream_index: Optional[int] = None,
block_size: int = 1000,
block_duration: float = 0.5,
):
super().__init__(uri, sample_rate)
self.block_size = block_size
self.block_size = int(np.rint(block_duration * self.sample_rate))
self._streamer = streamer
self._streamer.add_basic_audio_stream(
frames_per_chunk=self.block_size,
Expand Down

0 comments on commit c878998

Please sign in to comment.