In [3]:
import pandas as pd
import math

# Load the CSV file
file_path = r"D:\SOKM\11 Identity 2 SoKM 2024 - 2025\11 Identity 2 SoKM 2024 - 2025_audio_english_v3_translated_d_openai_gpt_4_1_pv1.1.csv"
# Try different encodings to handle the UnicodeDecodeError
try:
    df = pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
    try:
        df = pd.read_csv(file_path, encoding='latin-1')
    except UnicodeDecodeError:
        try:
            df = pd.read_csv(file_path, encoding='cp1252')
        except UnicodeDecodeError:
            df = pd.read_csv(file_path, encoding='utf-8', errors='ignore')
            print("Warning: Some characters may have been ignored due to encoding issues.")

# Correct column names identified from the previous step
start_time_col = 'Segment Start'
end_time_col = 'Segment End'
# subtitle_col = 'Translation_nativespeaker'
subtitle_col = 'Translated Text'

def convert_to_srt_time(time_in_seconds):
    """Converts time in seconds to SRT time format (HH:MM:SS,ms)."""
    if not isinstance(time_in_seconds, (int, float)) or math.isnan(time_in_seconds):
        return "00:00:00,000"

    milliseconds = int(round((time_in_seconds - int(time_in_seconds)) * 1000))
    seconds = int(time_in_seconds)
    minutes = seconds // 60
    hours = minutes // 60
    seconds = seconds % 60
    minutes = minutes % 60
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

# Create the SRT content
srt_content = ""
subtitle_index = 1
for index, row in df.iterrows():
    start_time_val = row[start_time_col]
    end_time_val = row[end_time_col]
    subtitle_text = row[subtitle_col]

    # Skip rows where subtitle text is empty or not a string
    if not isinstance(subtitle_text, str) or not subtitle_text.strip():
        continue

    start_time = convert_to_srt_time(start_time_val)
    end_time = convert_to_srt_time(end_time_val)

    srt_content += f"{subtitle_index}\n"
    srt_content += f"{start_time} --> {end_time}\n"
    srt_content += f"{subtitle_text}\n\n"
    subtitle_index += 1

# Define the output file name to be the same as the original file but with .srt extension and '_subtitles' at the end
srt_file_name = file_path.replace('.csv', '_subtitles.srt')

# Write the content to the .srt file
with open(srt_file_name, 'w', encoding='utf-8-sig') as f:
    f.write(srt_content)

print(f"SRT file '{srt_file_name}' has been created successfully.")

SRT file 'D:\SOKM\11 Identity 2 SoKM 2024 - 2025\11 Identity 2 SoKM 2024 - 2025_audio_english_v3_translated_d_openai_gpt_4_1_pv1.1_subtitles.srt' has been created successfully.


Burn in video

In [14]:
import subprocess, os, shlex

def burn_subtitles(video_in: str, srt_in: str, video_out: str,
                   style: str | None = None) -> None:
    if not os.path.exists(video_in):
        print(f"❌  Video not found: {video_in}"); return
    if not os.path.exists(srt_in):
        print(f"❌  Subtitle file not found: {srt_in}"); return

    # 1️⃣ normalise to forward-slashes, 2️⃣ escape the drive-letter colon
    sub_path = srt_in.replace("\\", "/").replace(":", r"\:")

    vf = f"subtitles=file='{sub_path}'"
    if style:
        vf += f":force_style='{style}'"

    cmd = [
        "ffmpeg",
        "-i", video_in,
        "-vf", vf,      # filtergraph is *one* argument
        "-y",           # overwrite without asking
        video_out
    ]

    print("Running command:\n", shlex.join(cmd), "\n")
    try:
        subprocess.run(cmd, check=True)
        print(f"✅  Finished → {video_out}")
    except subprocess.CalledProcessError as e:
        print("❌  FFmpeg failed")
        print(e.stderr or e.stdout)

if __name__ == "__main__":
    burn_subtitles(
        r"D:\SOKM\Testing\01 Introduction SoKM 2024 - 2025 4k_480p_2m30s.mp4",
        r"D:\SOKM\01 Introduction SoKM 2024 - 2025 4k_audio_english_fixed.srt",
        "testing_burned.mp4",
        "FontName=Lato,FontSize=22,PrimaryColour=&H00FFFFFF&,BorderStyle=3,OutlineColour=&H99000000,Shadow=1"
    )


Running command:
 ffmpeg -i 'D:\SOKM\Testing\01 Introduction SoKM 2024 - 2025 4k_480p_2m30s.mp4' -vf 'subtitles=file='"'"'D\:/SOKM/01 Introduction SoKM 2024 - 2025 4k_audio_english_fixed.srt'"'"':force_style='"'"'FontName=Lato,FontSize=22,PrimaryColour=&H00FFFFFF&,BorderStyle=3,OutlineColour=&H99000000,Shadow=1'"'"'' -y testing_burned.mp4 

❌  FFmpeg failed
None


In [7]:
print("output_video_styled:", output_video_styled)

output_video_styled: D:\SOKM\Testing\01 Introduction SoKM 2024 - 2025 4k_480p_2m30s_burned.mp4
