juanmc2005 · juanmc2005 · Jun 13, 2022 · Jun 9, 2022 · Jun 9, 2022 · Jun 9, 2022
diff --git a/README.md b/README.md
@@ -27,17 +27,24 @@ conda create -n diart python=3.8
 conda activate diart
 ```
 
-2) [Install PyTorch](https://pytorch.org/get-started/locally/#start-locally)
+2) Install `PortAudio` and `soundfile`:
 
-3) Install pyannote.audio 2.0 (currently in development)
+```shell
+conda install portaudio
+conda install pysoundfile -c conda-forge
+```
+
+3) [Install PyTorch](https://pytorch.org/get-started/locally/#start-locally)
+
+4) Install pyannote.audio 2.0 (currently in development)
 
 ```shell
 pip install git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
 ```
 
 **Note:** starting from version 0.4, installing pyannote.audio is mandatory to run the default system or to use pyannote-based models. In any other case, this step can be ignored.
 
-4) Install diart:
+5) Install diart:
 ```shell
 pip install diart
 ```
@@ -102,7 +109,7 @@ embedding = OverlapAwareSpeakerEmbedding(emb_model)
 mic = MicrophoneAudioSource(seg_model.get_sample_rate())
 
 # Reformat microphone stream. Defaults to 5s duration and 500ms shift
-regular_stream = mic.stream.pipe(dops.regularize_stream(seg_model.get_sample_rate()))
+regular_stream = mic.stream.pipe(dops.regularize_audio_stream(seg_model.get_sample_rate()))
 # Branch the microphone stream to calculate segmentation
 segmentation_stream = regular_stream.pipe(ops.map(segmentation))
 # Join audio and segmentation stream to calculate speaker embeddings

diff --git a/src/diart/audio.py b/src/diart/audio.py
@@ -98,4 +98,4 @@ def get_num_sliding_chunks(self, filepath: FilePath, chunk_duration: float, step
             Duration of the step between chunks in seconds.
         """
         numerator = self.get_duration(filepath) - chunk_duration + step_duration
-        return int(np.ceil(numerator / step_duration))
+        return int(np.ceil(numerator / step_duration))
diff --git a/src/diart/benchmark.py b/src/diart/benchmark.py
@@ -1,13 +1,10 @@
 import argparse
 
-import torch
-
 import diart.argdoc as argdoc
 from diart.inference import Benchmark
 from diart.pipelines import OnlineSpeakerDiarization, PipelineConfig
 
 if __name__ == "__main__":
-    # Define script arguments
     parser = argparse.ArgumentParser()
     parser.add_argument("root", type=str, help="Directory with audio files CONVERSATION.(wav|flac|m4a|...)")
     parser.add_argument("--reference", type=str, help="Optional. Directory with RTTM files CONVERSATION.rttm. Names must match audio files")
@@ -24,20 +21,7 @@
     parser.add_argument("--output", type=str, help=f"{argdoc.OUTPUT}. Defaults to `root`")
     args = parser.parse_args()
 
-    # Set benchmark configuration
-    benchmark = Benchmark(args.root, args.reference, args.output)
-
-    # Define online speaker diarization pipeline
-    pipeline = OnlineSpeakerDiarization(PipelineConfig(
-        step=args.step,
-        latency=args.latency,
-        tau_active=args.tau,
-        rho_update=args.rho,
-        delta_new=args.delta,
-        gamma=args.gamma,
-        beta=args.beta,
-        max_speakers=args.max_speakers,
-        device=torch.device("cpu") if args.cpu else None,
-    ))
-
-    benchmark(pipeline, args.batch_size)
+    Benchmark(args.root, args.reference, args.output)(
+        OnlineSpeakerDiarization(PipelineConfig.from_namespace(args), profile=True),
+        args.batch_size
+    )