Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add --sub-cn-inline-limit and --sub-cn-modal-words by jionlp. #14

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions autocut/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ def main():
parser.add_argument('--device', type=str, default=None,
choices=['cpu', 'cuda'],
help='Force to CPU or GPU for trascribing. In default automatically use GPU if available.')
parser.add_argument('--sub-cn-inline-limit', type=int, default=16, # set 0 to disable
help='Optimize the display of long sentences in subtitle for Chinese')
parser.add_argument('--sub-cn-modal-words', type=str, default="啊,吧", # use English comma to separate
help='To filter the modal words in sentences for Chinese')

args = parser.parse_args()

Expand Down
9 changes: 6 additions & 3 deletions autocut/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def run(self):

audio = whisper.load_audio(input, sr=self.sampling_rate)
speech_timestamps = self._detect_voice_activity(audio)
transcribe_results = self._transcibe(audio, speech_timestamps)
transcribe_results = self._transcribe(audio, speech_timestamps)

output = name + '.srt'
self._save_srt(output, transcribe_results)
Expand Down Expand Up @@ -65,7 +65,7 @@ def _detect_voice_activity(self, audio):
logging.info(f'Done voice activity detetion in {time.time() - tic:.1f} sec')
return speeches

def _transcibe(self, audio, speech_timestamps):
def _transcribe(self, audio, speech_timestamps):
tic = time.time()
if self.whisper_model is None:
self.whisper_model = whisper.load_model(self.args.whisper_model, self.args.device)
Expand Down Expand Up @@ -106,7 +106,10 @@ def _add_sub(start, end, text):
_add_sub(start, end, s["text"])
prev_end = end

with open(output, 'wb') as f:
from .transcribe_middleware import TranscribeMiddleware
TranscribeMiddleware(self.args, subs).run()

with open(output, mode='wb') as f:
f.write(srt.compose(subs).encode(self.args.encoding, 'replace'))

def _save_md(self, md_fn, srt_fn, video_fn):
Expand Down
102 changes: 102 additions & 0 deletions autocut/transcribe_middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import srt


class TranscribeMiddleware:
def __init__(self, args, subs: list[srt.Subtitle]):
self.args = args
self.subs = subs
self.SINGLE_SUB_CN_MAX_LEN = self.args.sub_cn_inline_limit
self.MODAL_WORDS_CN = self.args.sub_cn_modal_words.strip()

def run(self):
if self.args.lang == "zh":
if self.args.sub_cn_inline_limit > 0:
self._sub_split_CN()

if len(self.args.sub_cn_modal_words.strip()) > 0:
self._sub_filter_modal_CN()

def _sub_split_CN(self):
import datetime
import jionlp as jio

new_subs = []

for sub in self.subs:
duration = (sub.end - sub.start).total_seconds()

# sometimes zh-res will occur English comma
sub_content_temp = sub.content.strip().replace(",", ",")
# use jionlp[https://github.com/dongrixinyu/JioNLP] to split Chinese sentence
sub_split_list = jio.split_sentence(sub_content_temp, criterion='fine')
sub_len = len(sub_content_temp)

# Sliding Window to control single sentence length, in the case of uniform speech speed
interval_start = sub.start.total_seconds()
interval_end = sub.start.total_seconds()
interval_len = 0
start_index = 0

def _add_sub(target_index):
new_subs.append(srt.Subtitle(index=0,
start=datetime.timedelta(seconds=interval_start),
end=datetime.timedelta(seconds=interval_end),
content="".join(sub_split_list[start_index:target_index])))

for index, sub_split_item in enumerate(sub_split_list):
sub_split = sub_split_item.strip()

if index > 0 and interval_len + len(sub_split) > self.SINGLE_SUB_CN_MAX_LEN + self.SINGLE_SUB_CN_MAX_LEN // 2:
_add_sub(index)
interval_start = interval_end
start_index = index
interval_len = 0

interval_len = interval_len + len(sub_split)
interval_end = interval_end + (len(sub_split) / sub_len) * duration

if interval_len < self.SINGLE_SUB_CN_MAX_LEN + 1:
continue

_add_sub(index + 1)
interval_start = interval_end
start_index = index + 1
interval_len = 0

if interval_len != 0:
new_subs.append(srt.Subtitle(index=0,
start=datetime.timedelta(seconds=interval_start),
end=datetime.timedelta(seconds=interval_end),
content="".join(sub_split_list[start_index:])))

self.subs.clear()
self.subs.extend(new_subs)

def _sub_filter_modal_CN(self):
import jionlp as jio
import re

key_list = [key.strip() for key in self.MODAL_WORDS_CN.split(",")]
for sub in self.subs:
# list of separate short sentence
sub_split_list = jio.split_sentence(sub.content.strip().replace(",", ","), criterion='fine')

trigger = False
new_sub_split_list = []
for sub_split_item in sub_split_list:
sub_split = sub_split_item.strip()
# via jionlp, the last character is always text or punctuation
last_word_index = -1 if re.match(r"^[\u4E00-\u9FA5A-Za-z0-9_]+$", sub_split[-1]) else -2

if sub_split[last_word_index] in key_list:
trigger = True
temp = sub_split[:last_word_index]
if last_word_index == -2:
temp += sub_split[-1]
new_sub_split_list.append(temp)
continue

new_sub_split_list.append(sub_split)

if trigger:
sub.content = "".join(new_sub_split_list)
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from setuptools import setup, find_packages
import os

from setuptools import setup, find_packages

requirements = [
'srt',
'moviepy',
'opencc-python-reimplemented',
'whisper @ git+https://github.com/openai/whisper.git'
'whisper @ git+https://github.com/openai/whisper.git',
'jionlp',
]

init_fn = os.path.join(os.path.dirname(__file__), 'autocut', '__init__.py')
Expand Down