mli · mli · Dec 14, 2022 · Nov 29, 2022 · Nov 29, 2022 · Nov 30, 2022
diff --git a/autocut/daemon.py b/autocut/daemon.py
@@ -42,8 +42,9 @@ def _iter(self):
                 if utils.add_cut(md_fn) in files:
                     continue
                 md = utils.MD(md_fn, self.args.encoding)
-                if not md.done_editing() or \
-                    os.path.exists(utils.change_ext(utils.add_cut(f), "mp4")):
+                if not md.done_editing() or os.path.exists(
+                    utils.change_ext(utils.add_cut(f), "mp4")
+                ):
                     continue
                 args.inputs = [f, md_fn, srt_fn]
                 cut.Cutter(args).run()

diff --git a/autocut/main.py b/autocut/main.py
@@ -70,9 +70,7 @@ def main():
         help="The bitrate to export the cutted video, such as 10m, 1m, or 500k",
     )
     parser.add_argument(
-        "--vad", help="If or not use VAD",
-        choices=["1", "0", "auto"],
-        default="auto"
+        "--vad", help="If or not use VAD", choices=["1", "0", "auto"], default="auto"
     )
     parser.add_argument(
         "--force",

diff --git a/autocut/transcribe.py b/autocut/transcribe.py
@@ -11,6 +11,17 @@
 from . import utils
 
 
+def process(whisper_model, audio, seg, lang, prompt):
+    r = whisper_model.transcribe(
+        audio[int(seg["start"]) : int(seg["end"])],
+        task="transcribe",
+        language=lang,
+        initial_prompt=prompt,
+    )
+    r["origin_timestamp"] = seg
+    return r
+
+
 class Transcribe:
     def __init__(self, args):
         self.args = args
@@ -27,8 +38,11 @@ def run(self):
                 continue
 
             audio = whisper.load_audio(input, sr=self.sampling_rate)
-            if (self.args.vad == "1" or
-                self.args.vad == "auto" and not name.endswith("_cut")):
+            if (
+                self.args.vad == "1"
+                or self.args.vad == "auto"
+                and not name.endswith("_cut")
+            ):
                 speech_timestamps = self._detect_voice_activity(audio)
             else:
                 speech_timestamps = [{"start": 0, "end": len(audio)}]
@@ -78,18 +92,40 @@ def _transcribe(self, audio, speech_timestamps):
             )
 
         res = []
-        # TODO, a better way is merging these segments into a single one, so whisper can get more context
-        for seg in speech_timestamps:
-            r = self.whisper_model.transcribe(
-                audio[int(seg["start"]) : int(seg["end"])],
-                task="transcribe",
-                language=self.args.lang,
-                initial_prompt=self.args.prompt,
-            )
-            r["origin_timestamp"] = seg
-            res.append(r)
-        logging.info(f"Done transcription in {time.time() - tic:.1f} sec")
-        return res
+        if self.args.device == "cpu":
+            from multiprocessing import Pool
+
+            pool = Pool(processes=4)
+            # TODO, a better way is merging these segments into a single one, so whisper can get more context
+            for seg in speech_timestamps:
+                res.append(
+                    pool.apply_async(
+                        process,
+                        (
+                            self.whisper_model,
+                            audio,
+                            seg,
+                            self.args.lang,
+                            self.args.prompt,
+                        ),
+                    )
+                )
+            pool.close()
+            pool.join()
+            logging.info(f"Done transcription in {time.time() - tic:.1f} sec")
+            return [i.get() for i in res]
+        else:
+            for seg in speech_timestamps:
+                r = self.whisper_model.transcribe(
+                    audio[int(seg["start"]) : int(seg["end"])],
+                    task="transcribe",
+                    language=self.args.lang,
+                    initial_prompt=self.args.prompt,
+                )
+                r["origin_timestamp"] = seg
+                res.append(r)
+            logging.info(f"Done transcription in {time.time() - tic:.1f} sec")
+            return res
 
     def _save_srt(self, output, transcribe_results):
         subs = []