In [1]:
from IPython.core.interactiveshell import InteractiveShell  #执行该代码可以使得当前nb支持多输出
InteractiveShell.ast_node_interactivity = "all" 
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd 
pd.options.display.max_rows = 8  
%matplotlib inline

In [2]:
import os
import ffmpeg
import whisper
import argparse
import warnings
import tempfile

视频的配置信息，包括原视频路径，生成字幕的语言，是否生成字幕文件等

In [3]:
base_dir = "video/"
file_name = "input"
input_dir: str = base_dir + file_name + ".mp4"   
output_dir: str = base_dir

model_name: str = "medium.en"   # `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`    
    
# whether to output the .srt file along with the video files
output_srt: bool = True
    
# only generate the .srt file and not create overlayed video
srt_only: bool = True
       
# "auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs",
# "cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw",
# "he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la",
# "lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no",
# "oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv",
# "sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"]
language: str = "auto"

args = {}

In [4]:
from googletrans import Translator
from typing import Iterator, TextIO
from tqdm import tqdm
import warnings

def str2bool(string):
    string = string.lower()
    str2val = {"true": True, "false": False}

    if string in str2val:
        return str2val[string]
    else:
        raise ValueError(
            f"Expected one of {set(str2val.keys())}, got {string}")


def format_timestamp(seconds: float, always_include_hours: bool = False):
    assert seconds >= 0, "non-negative timestamp expected"
    milliseconds = round(seconds * 1000.0)

    hours = milliseconds // 3_600_000
    milliseconds -= hours * 3_600_000

    minutes = milliseconds // 60_000
    milliseconds -= minutes * 60_000

    seconds = milliseconds // 1_000
    milliseconds -= seconds * 1_000

    hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
    return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"


def write_srt(transcript: Iterator[dict], file: TextIO):
    # 创建一个Translator对象
    # translator = Translator()
    
    for i, segment in enumerate(transcript, start=1):
        # translated = translator.translate(segment['text'].strip(), src='en', dest='zh-cn')
        print(
            f"{i}\n"
            f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
            f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
            f"{segment['text'].strip().replace('-->', '->')}\n",
            # f"{translated.text}\n",
            file=file,
            flush=True,
        )


def get_file_name(path):
    return os.path.splitext(os.path.basename(path))[0]

def get_subtitles(audio_paths: dict, output_srt: bool, output_dir: str, transcribe: callable):
    subtitles_path = {}

    # 使用 tqdm 创建进度条
    for path, audio_path in tqdm(audio_paths.items(), desc="Processing subtitles"):
        srt_path = output_dir + get_file_name(path) + ".srt"

        print("file_name:", get_file_name(path))
        print("srt_path:", srt_path)
        
        print(
            f"Generating subtitles for {get_file_name(path)}... This might take a while."
        )

        warnings.filterwarnings("ignore")
        result = transcribe(audio_path)
        warnings.filterwarnings("default")

        with open(srt_path, "w", encoding="utf-8") as srt:
            write_srt(result["segments"], file=srt)

        subtitles_path[path] = srt_path

    return subtitles_path

def get_audio(path):
    temp_dir = "temp_audio"
    os.makedirs(temp_dir, exist_ok=True)

    audio_paths = {}
    if not os.path.isfile(path):
        print(f"File not found: {path}")
        return audio_paths

    try:
        print(f"Extracting audio from {get_file_name(path)} ({path})...")
        output_path = os.path.join(temp_dir, f"{get_file_name(path)}.wav")

        # Run ffmpeg command and capture stdout and stderr
        out, err = ffmpeg.input(path).output(
            output_path,
            acodec="pcm_s16le", ac=1, ar="16k"
        ).run(capture_stdout=True, capture_stderr=True, overwrite_output=True)

        audio_paths[path] = output_path
    except ffmpeg.Error as e:
        # Print ffmpeg error message
        print(f"Error occurred while processing {path}:")
        if e.stderr:
            print(e.stderr.decode())
        else:
            print("No stderr output available.")

    return audio_paths

生成字幕文件

In [5]:
if model_name.endswith(".en"):
    warnings.warn(
        f"{model_name} is an English-only model, forcing English detection.")
    args["language"] = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
    args["language"] = language

model = whisper.load_model(model_name)
audios = get_audio(input_dir)
subtitles = get_subtitles(
    audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args)
)



Extracting audio from input (video/input.mp4)...


Processing subtitles:   0%|                                                                      | 0/1 [00:00<?, ?it/s]

file_name: input
srt_path: video/input.srt
Generating subtitles for input... This might take a while.


Processing subtitles: 100%|█████████████████████████████████████████████████████████████| 1/1 [08:08<00:00, 488.86s/it]


如果没有更新字幕文件可以跳过下面的 CELL，继续执行下一个

也可以选择执行，默认和上文中生成的路径是一致的

In [31]:
tmp_srt_path = base_dir + file_name + ".srt"

subtitles = { input_dir : tmp_srt_path}

将字幕加到视频中

In [7]:
for path, srt_path in subtitles.items():
    out_path = os.path.join(output_dir, f"output.mp4")

    print(f"Adding subtitles to {get_file_name(path)}...")
    print("path:" + path)
    print("srt_path:" + srt_path)
    print("out_path:" + out_path)

    video = ffmpeg.input(path)
    audio = video.audio

    ffmpeg.concat(
        video.filter('subtitles', srt_path, force_style="OutlineColour=&H40000000,BorderStyle=3"), audio, v=1, a=1
    ).output(out_path).run(quiet=False, overwrite_output=False)

    print(f"Saved subtitled video to {os.path.abspath(out_path)}.")

Adding subtitles to input...
path:video/input.mp4
srt_path:video/input.srt
out_path:video/output.mp4


(None, None)

Saved subtitled video to D:\sys\Anaconda3\ipython_file\video\github\video_processing\video\output.mp4.
