<a href="https://colab.research.google.com/github/kidcvs/Multimedia2SubVideo/blob/main/Audio2SubVideo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 安装必要库
!pip install git+https://github.com/openai/whisper.git
!pip install transformers
!pip install moviepy
!pip install ffmpeg-python
!apt-get install ffmpeg

# 导入库
import whisper
from transformers import MarianMTModel, MarianTokenizer
import moviepy.editor as mp
import ffmpeg
from google.colab import files
import os

# 上传影片文件
# Upload audio file
uploaded = files.upload()

# Get the filename
audio_filename = next(iter(uploaded))

# 加载 Whisper 模型
model = whisper.load_model("base")

# 音频转录
result = model.transcribe(audio_filename)
english_subtitles = result['text']
# 可选地，获取时间戳
segments = result['segments']

# 加载翻译模型和分词器
src_text = [segment['text'] for segment in segments]
model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
translation_model = MarianMTModel.from_pretrained(model_name)

# 翻译字幕
translated_subtitles = []
for text in src_text:
    translated = translation_model.generate(**tokenizer(text, return_tensors="pt", padding=True))
    translated_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
    translated_subtitles.append(translated_text[0])

# 合并带有时间戳的翻译
for i, segment in enumerate(segments):
    segment['translated_text'] = translated_subtitles[i]

# 创建视频剪辑
# 使用静态背景图片或颜色
video_clip = mp.ColorClip(size=(1280, 720), color=(0, 0, 0), duration=result['duration'])
# 设置音频
audio_clip = mp.AudioFileClip(audio_filename)
video_clip = video_clip.set_audio(audio_clip)

# 为MoviePy生成字幕
from moviepy.editor import TextClip, CompositeVideoClip

subtitles = []

for segment in segments:
    txt_clip = (TextClip(segment['translated_text'], fontsize=24, color='white')
                .set_position(('center', 'bottom'))
                .set_start(segment['start'])
                .set_duration(segment['end'] - segment['start']))
    subtitles.append(txt_clip)

final_video = CompositeVideoClip([video_clip, *subtitles])

# 导出视频
output_filename = 'output_video.mp4'
final_video.write_videofile(output_filename, fps=24)

# 提供下载链接
files.download(output_filename)
