In [None]:
!pip install srt
!pip install -U langchain langchain-openai

In [None]:
import os
import srt
import asyncio
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferWindowMemory
from srt import compose
import nest_asyncio
import subprocess

nest_asyncio.apply()

# 定数
OPENAI_API_KEY = os.getenv('OPEN_AI_KEY')
MODEL_NAME = "gpt-4o-mini"
TEMPERATURE = 0.6
MEMORY_SIZE = 5
BATCH_SIZE = 30
MAX_LINE_LENGTH = 20
MAX_LINES = 2

# LLMの初期化
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
llm = ChatOpenAI(model_name=MODEL_NAME, temperature=TEMPERATURE)

# 翻訳用のプロンプトテンプレート
template = """あなたはプロの翻訳者であり、字幕翻訳の専門家です。以下の点を考慮して、自然で正確な日本語に翻訳してください。
* 口語的な表現を適切に使用する
* 文脈に合わせた適切な語彙を選択する
* 翻訳後の字幕は、視聴者が読みやすいように簡潔で自然な表現にする

コンテキスト: {context}
前の字幕: {prev_subtitle}
現在の字幕: {text}
次の字幕: {next_subtitle}

翻訳のみを返してください。
"""
prompt = PromptTemplate(
    input_variables=["context", "prev_subtitle", "text", "next_subtitle"],
    template=template
)

# ConversationBufferWindowMemoryの初期化
memory = ConversationBufferWindowMemory(k=MEMORY_SIZE)

# 翻訳チェインの定義
chain = LLMChain(llm=llm, prompt=prompt)

# 二段階目の改善用プロンプト
refinement_template = """
以下の翻訳を見直し、より自然で正確な日本語に調整してください。
原文: {original}
初期翻訳: {initial_translation}

改善された翻訳のみを返してください。
"""
refinement_prompt = PromptTemplate(
    input_variables=["original", "initial_translation"],
    template=refinement_template
)
refinement_chain = LLMChain(llm=llm, prompt=refinement_prompt)

def get_srt_files():
    """現在のディレクトリ内のsrtファイルのリストを取得する。"""
    return [f for f in os.listdir('.') if f.endswith('.srt')]

def select_srt_file():
    """ユーザーにsrtファイルを選択させる。"""
    srt_files = get_srt_files()
    if not srt_files:
        print("No .srt files found in the current directory.")
        return None

    print("Available .srt files:")
    for i, file in enumerate(srt_files, 1):
        print(f"{i}. {file}")

    while True:
        try:
            choice = int(input("Enter the number of the file you want to use: "))
            if 1 <= choice <= len(srt_files):
                return srt_files[choice - 1]
            else:
                print("Invalid choice. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a number.")

def read_srt(input_srt_file):
    """SRTファイルを読み込み、字幕データのリストを返す。"""
    try:
        with open(input_srt_file, 'r', encoding='utf-8', errors='ignore') as file:
            srt_data = file.read()
        return list(srt.parse(srt_data))
    except Exception as e:
        print(f"Error reading SRT file: {e}")
        return []

async def translate_subtitles(subtitles, context):
    """字幕データをバッチ処理で翻訳する。"""
    translated_subtitles = []
    for i in range(0, len(subtitles), BATCH_SIZE):
        batch = subtitles[i:i + BATCH_SIZE]
        tasks = []
        for j, subtitle in enumerate(batch):
            prev_subtitle = subtitles[i+j-1].content if i+j > 0 else ""
            next_subtitle = subtitles[i+j+1].content if i+j < len(subtitles) - 1 else ""
            task = chain.arun(
                context=context,
                prev_subtitle=prev_subtitle,
                text=subtitle.content,
                next_subtitle=next_subtitle
            )
            tasks.append(task)
        
        initial_results = await asyncio.gather(*tasks)
        
        # 二段階目の改善プロセス
        refinement_tasks = [
            refinement_chain.arun(
                original=batch[j].content,
                initial_translation=result
            ) for j, result in enumerate(initial_results)
        ]
        refined_results = await asyncio.gather(*refinement_tasks)
        
        for j, result in enumerate(refined_results):
            subtitles[i + j].content = result
            progress = (i + j + 1) / len(subtitles) * 100
            print(f"Translation progress: {progress:.2f}%")
    
    return subtitles

def write_srt(output_srt_file, subtitles):
    """翻訳された字幕データをSRTファイルに書き込む。"""
    try:
        translated_srt_content = compose(subtitles)
        with open(output_srt_file, "w", encoding="utf-8") as f:
            f.write(translated_srt_content)
    except Exception as e:
        print(f"Error writing SRT file: {e}")

def add_line_breaks(text):
    """テキストに改行を追加する。"""
    result = ""
    current_line = ""
    lines = []

    for char in text:
        if char == "、" or char == "。":
            current_line += char
            if len(current_line) > MAX_LINE_LENGTH or char == "。":
                lines.append(current_line)
                current_line = ""
        else:
            current_line += char

    if current_line:
        lines.append(current_line)

    result_lines = []
    current_line = ""
    for line in lines:
        if len(current_line + line) <= MAX_LINE_LENGTH:
            current_line += line
        else:
            result_lines.append(current_line)
            current_line = line

    if current_line:
        result_lines.append(current_line)

    return "\n".join(result_lines)

def add_line_breaks_to_srt(srt_text):
    """SRT形式を保持して改行を追加する。"""
    lines = srt_text.split("\n")
    result = ""

    for i, line in enumerate(lines):
        if line.strip().isdigit() or "-->" in line:
            result += line
        else:
            result += add_line_breaks(line) + "\n"

        if i < len(lines) - 1 and lines[i + 1] != "" and line != "":
            result += "\n"

    return result.strip()

async def process_srt_file(input_srt_file, output_srt_file):
    """SRTファイルを読み込み、翻訳して新しいSRTファイルに書き込む。"""
    context = input("ビデオの内容や概要を簡単に説明してください: ")
    subtitles = read_srt(input_srt_file)
    if not subtitles:
        print("No subtitles to translate.")
        return
    translated_subtitles = await translate_subtitles(subtitles, context)
    write_srt(output_srt_file, translated_subtitles)

    with open(output_srt_file, 'r', encoding='utf-8') as file:
        srt_text = file.read()

    result = add_line_breaks_to_srt(srt_text)

    with open("translated_file_result.srt", "w", encoding='utf-8') as file:
        file.write(result)

    print(f"Translation completed: translated_file_result.srt")

def create_video_with_subtitles(video_file, subtitle_file, output_video_file):
    """字幕付きのビデオを作成する。"""
    process = subprocess.Popen([
        'ffmpeg',
        '-hwaccel', 'auto',
        '-i', f"{video_file}",
        '-vf', f"subtitles={subtitle_file}:force_style='FontName=Helvetica,FontSize=11'",
        '-c:v', 'h264_amf',
        '-c:a', 'copy',
        '-progress', '-',  # 標準エラー出力に進行状況を出力
        f'{output_video_file}'
    ], stderr=subprocess.PIPE, universal_newlines=True)

    while True:
        line = process.stderr.readline()
        if line == '' and process.poll() is not None:
            break
        if 'frame=' in line:
            print(line.strip())

    if process.returncode != 0:
        print(f"Error creating video: {process.returncode}")

# 実行
if __name__ == "__main__":
    input_srt = select_srt_file()
    if input_srt:
        output_srt = 'translated_' + input_srt
        video = input_srt.replace(".srt", ".mp4")
        output_video = video.replace(".mp4", "_jp.mp4")

        asyncio.run(process_srt_file(input_srt, output_srt))

        create_video_with_subtitles(video, "translated_file_result.srt", output_video)
    else:
        print("No file selected. Exiting.")