ATO MIASA MI COPIER REVIEW FILE

In [1]:
import re

def parse_segments(content):
    """
    Parse the content into segments based on the segment header.
    Returns a dictionary with segment numbers (as integers) as keys and the full segment text as values.
    """
    segments = {}
    # The pattern finds segments that start with "Segment <number>" until the next segment or end of file.
    pattern = r"(Segment\s+(\d+).*?)(?=^Segment\s+\d+|\Z)"
    matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
    for full_seg, seg_no in matches:
        segments[int(seg_no)] = full_seg
    return segments

def get_field_text(segment_text, field_name):
    """
    Extract the text following a given field marker (e.g., **Original:**, **Auto Translated:**, **Final Translation:**)
    from the segment text. Returns the text stripped of leading/trailing spaces.
    """
    # Using a regex pattern that stops at the end of the line.
    pattern = r"\*\*" + re.escape(field_name) + r":\*\*\s*(.*)"
    match = re.search(pattern, segment_text)
    if match:
        return match.group(1).strip()
    return None

def update_segment_fields(review_segment, updated_segment, fields):
    """
    Replace the specified fields in the review_segment with the ones extracted
    from the updated_segment.
    
    fields should be a list of field names such as ["Original", "Auto Translated", "Final Translation"].
    """
    updated_seg = review_segment
    for field in fields:
        new_text = get_field_text(updated_segment, field)
        if new_text is not None:
            # Replace the entire line for the field in the review segment.
            # The regex captures the marker and then replaces what follows.
            pattern = r"(\*\*" + re.escape(field) + r":\*\*\s*).*$"
            replacement = r"\1" + new_text
            updated_seg = re.sub(pattern, replacement, updated_seg, flags=re.MULTILINE)
    return updated_seg

def update_translation_review(review_content, updated_segments, fields):
    """
    For each segment in review_content, replace the specified fields with those from the updated segments.
    """
    # Find the segments in review content using the same segmentation parser.
    pattern = r"(Segment\s+(\d+).*?)(?=^Segment\s+\d+|\Z)"
    matches = re.findall(pattern, review_content, re.DOTALL | re.MULTILINE)

    updated_content = review_content  # Work on a copy to allow replacements.

    for full_seg, seg_no in matches:
        seg_no_int = int(seg_no)
        if seg_no_int in updated_segments:
            # Replace the fields (Original, Auto Translated, Final Translation) based on updated segments.
            new_seg = update_segment_fields(full_seg, updated_segments[seg_no_int],
                                            fields)
            # Replace the old segment in the overall content with the updated segment.
            updated_content = updated_content.replace(full_seg, new_seg)
    return updated_content

def main():
    # Define the file paths (adjust these paths as needed)
    updated_file_path = "to translate/translation_review_modifs ad 05.08 (2).txt"
    review_file_path = "to translate/translation_review.txt"
    
    # Read the updated file content
    with open(updated_file_path, encoding='utf-8') as f:
        updated_content = f.read()
    
    # Read the review file content
    with open(review_file_path, encoding='utf-8') as f:
        review_content = f.read()
    
    # Parse segments from the updated file content
    updated_segments = parse_segments(updated_content)
    
    # Define the list of fields to update.
    fields_to_update = ["Original", "Auto Translated", "Final Translation"]
    
    # Replace the fields in the review content using the updated segments
    new_review_content = update_translation_review(review_content, updated_segments,
                                                   fields_to_update)
    
    # Write the updated content to a new file (or overwrite the original file if desired)
    output_file_path = "to translate/translation_review_updated.txt"
    with open(output_file_path, "w", encoding='utf-8') as f:
        f.write(new_review_content)
    
    print(f"Updated translation review file has been saved as {output_file_path}")

if __name__ == "__main__":
    main()


Updated translation review file has been saved as to translate/translation_review_updated.txt


In [2]:
import re

def parse_segments(content):
    """
    Parse the content into segments using a regular expression that captures each segment.
    Returns a dictionary with segment numbers (as integers) as keys and the full segment text as values.
    """
    segments = {}
    # This pattern finds segments that start with "Segment <number>" and goes until the next segment or end of file.
    pattern = r"(Segment\s+(\d+).*?)(?=^Segment\s+\d+|\Z)"
    matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE)
    for full_seg, seg_no in matches:
        segments[int(seg_no)] = full_seg
    return segments

def get_final_translation(segment_text):
    """
    Extracts the final translation text from a segment.
    It looks for the pattern **Final Translation:** followed by any text.
    """
    match = re.search(r"\*\*Final Translation:\*\*\s*(.*)", segment_text)
    if match:
        return match.group(1).strip()
    return None

def update_translation_review(review_content, updated_segments):
    """
    For each segment in the review file's content, replaces the **Final Translation:** text
    with the one from the updated segments.
    """
    # Parse the review content into segments using the same parser
    review_segments = {}
    pattern = r"(Segment\s+(\d+).*?)(?=^Segment\s+\d+|\Z)"
    matches = re.findall(pattern, review_content, re.DOTALL | re.MULTILINE)
    
    updated_content = review_content  # Work on a copy of the content

    for full_seg, seg_no in matches:
        seg_no_int = int(seg_no)
        if seg_no_int in updated_segments:
            updated_final = get_final_translation(updated_segments[seg_no_int])
            if updated_final is not None:
                # Replace the **Final Translation:** line in this segment with the new translation.
                # This regex pattern matches the '**Final Translation:**' line and captures the prefix.
                new_seg = re.sub(
                    r"(\*\*Final Translation:\*\*\s*).*",  # match the line starting with **Final Translation:**
                    r"\1" + updated_final,  # replace with the captured prefix plus the updated text
                    full_seg
                )
                # Replace the old segment in the content with the updated segment.
                updated_content = updated_content.replace(full_seg, new_seg)

    return updated_content

def main():
    # Define file paths (make sure these paths are correct)
    updated_file_path = "to translate/translation_review_farany.txt"
    review_file_path = "to translate/translation_review.txt"
    
    # Read updated file content
    with open(updated_file_path, encoding='utf-8') as f:
        updated_content = f.read()
    
    # Read review file content
    with open(review_file_path, encoding='utf-8') as f:
        review_content = f.read()
    
    # Parse segments from the updated content
    updated_segments = parse_segments(updated_content)
    
    # Replace the **Final Translation:** values in the review content
    new_review_content = update_translation_review(review_content, updated_segments)
    
    # Write the updated content to a new file (or overwrite the original file if desired)
    output_file_path = "to translate/translation_review_updated.txt"
    with open(output_file_path, "w", encoding='utf-8') as f:
        f.write(new_review_content)
    
    print(f"Updated translation review file has been saved as {output_file_path}")

if __name__ == "__main__":
    main()


Updated translation review file has been saved as to translate/translation_review_updated.txt


In [11]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
from aiohttp import ClientConnectorError
import random
import concurrent.futures
import pyttsx3

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.2_Flux de navigation_Avr_08_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# Choose which TTS engine to use by default:
# If USE_PYTTSX3 is True then offline pyttsx3 will be used,
# otherwise robust Edge TTS (cloud-based) is used.
USE_PYTTSX3 = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r"[.!?]\s*$")
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("You can update the following properties for each segment:\n")
        f.write("  **Final Translation:** Your updated French text\n")
        f.write("  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')\n")
        f.write("  **Silence Duration:** Silence (in ms) to append (default 100 ms)\n")
        f.write("---\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations and the additional properties as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, "r", encoding="utf-8") as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split("---") if blk.strip()]
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # in ms default
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation is not None:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = []
    current = []
    words = text.split()
    for i, word in enumerate(words):
        current.append(word)
        if re.search(r"[.!?]$", word):
            if (i == len(words) - 1) or (words[i + 1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    if total_fr_words == 0:
        return [1 / len(translated_phrases)] * len(translated_phrases)
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions: Robust Edge TTS and Offline pyttsx3 Fallback ==============
async def robust_synthesize_phrase(phrase: str, output_path: str, connector: aiohttp.TCPConnector, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", max_retries: int = 5):
    """
    Synthesize speech using Edge TTS with robust retry logic.
    Uses exponential backoff with jitter.
    """
    for attempt in range(max_retries):
        try:
            async with aiohttp.ClientSession(connector=connector, timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate,
                    connector=connector
                )
                await communicate.save(output_path)
                return
        except (ClientConnectorError, ConnectionResetError) as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Connection error on attempt {attempt+1}/{max_retries} for phrase: '{phrase}': {e}. Retrying in {wait_time:.2f} seconds.")
            await asyncio.sleep(wait_time)
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Error on attempt {attempt+1}/{max_retries} for phrase: '{phrase}': {e}. Retrying in {wait_time:.2f} seconds.")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

def synthesize_phrase_pyttsx3(phrase: str, output_path: str, voice: str = None, rate: str = "+0%"):
    """
    Synthesize speech using offline pyttsx3.
    Saves the output as a WAV file.
    """
    engine = pyttsx3.init()
    if voice is not None:
        engine.setProperty("voice", voice)
    base_rate = engine.getProperty("rate")
    try:
        modifier = int(rate.strip(" %+"))
    except Exception:
        modifier = 0
    new_rate = base_rate + modifier
    engine.setProperty("rate", new_rate)
    # Save to file (WAV format)
    engine.save_to_file(phrase, output_path)
    engine.runAndWait()

async def synthesize_phrase(phrase: str, output_path: str, connector: aiohttp.TCPConnector, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    """
    Wrapper function to choose the TTS engine.
    If USE_PYTTSX3 is True, use the offline pyttsx3 engine (runs in executor);
    otherwise, use robust Edge TTS.
    """
    if USE_PYTTSX3:
        loop = asyncio.get_event_loop()
        with concurrent.futures.ThreadPoolExecutor() as pool:
            await loop.run_in_executor(pool, synthesize_phrase_pyttsx3, phrase, output_path, voice, rate)
    else:
        await robust_synthesize_phrase(phrase, output_path, connector, voice, rate)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, connector: aiohttp.TCPConnector, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await synthesize_phrase(phrase, output_path, connector, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Persistent Connector Creation ==============
def create_persistent_connector():
    ssl_context = ssl.create_default_context()
    connector = aiohttp.TCPConnector(ssl=ssl_context, limit=10)
    return connector

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path, persistent_connector):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    offset_threshold = 0.05
    
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0
        
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(
                    phrase, temp_path, connector=persistent_connector, voice="fr-FR-DeniseNeural", rate=voice_speed_override
                )
            except Exception as e:
                print(f"[Warning] Synthesis failed for phrase '{phrase}': {e}. Skipping this phrase.")
                continue
            # Now try to load the file; if decoding fails, fallback to pyttsx3
            try:
                audio = AudioSegment.from_mp3(temp_path)
            except Exception as e:
                print(f"[Warning] Decoding failed for file {temp_path}: {e}. Falling back to offline pyttsx3.")
                fallback_path = temp_path.replace(".mp3", ".wav")
                try:
                    synthesize_phrase_pyttsx3(phrase, fallback_path, voice="fr-FR-DeniseNeural", rate=voice_speed_override)
                    audio = AudioSegment.from_file(fallback_path, format="wav")
                    os.remove(fallback_path)
                except Exception as ex:
                    print(f"[Warning] Offline fallback failed for phrase '{phrase}': {ex}. Skipping this phrase.")
                    continue
            if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44 and audio:
                try:
                    audio = adjust_audio_duration(audio, phrase_duration)
                    phrase_audios.append(audio)
                except Exception as e:
                    print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
            else:
                print(f"[Warning] Missing or invalid file: {temp_path}")
            if os.path.exists(temp_path):
                os.remove(temp_path)
        
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        combined_audio += group_audio
        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    
    with open(debug_log_path, "w", encoding="utf-8") as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path

# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    persistent_connector = create_persistent_connector()
    try:
        print("Extracting audio...")
        audio_path = extract_audio()
        print("Transcribing audio...")
        language, segments = transcribe(audio_path)
        print("Generating English subtitles...")
        generate_subtitle_file(segments, subtitle_file_en)
        print("Generating French audio with synchronization and manual overrides...")
        await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file, persistent_connector)
        print("Merging audio and video...")
        merge_audio_video()
        print(f"Process completed! Output video: {output_video}")
    finally:
        await persistent_connector.close()

if __name__ == "__main__":
    asyncio.run(async_main())


✅ ffmpeg found at: C:\ffmpeg\bin\ffmpeg.EXE
Extracting audio...
Transcribing audio...


Task exception was never retrieved
future: <Task finished name='Task-1921' coro=<async_main() done, defined at C:\Users\061181CA8\AppData\Local\Temp\ipykernel_16532\2572477427.py:401> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "c:\AI PROJECTS\video_transcript\myenv\Lib\site-packages\IPython\core\interactiveshell.py", line 3549, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\061181CA8\AppData\Local\Temp\ipykernel_16532\2572477427.py", line 419, in <module>
    asyncio.run(async_main())
  File "c:\AI PROJECTS\video_transcript\myenv\Lib\site-packages\nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\AI PROJECTS\video_transcript\myenv\Lib\site-packages\nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "c:\AI PROJECTS\video_transcript\myenv\Lib\site-packages\nest_asyncio.py", line 133, in _run_once
    handle._run()
  File 

Detected language: en


KeyboardInterrupt: 

In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import random
import concurrent.futures
import pyttsx3

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# For this version, we use only offline pyttsx3.
USE_PYTTSX3 = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r"[.!?]\s*$")
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("You can update the following properties for each segment:\n")
        f.write("  **Final Translation:** Your updated French text\n")
        f.write("  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')\n")
        f.write("  **Silence Duration:** Silence (in ms) to append (default 100 ms)\n")
        f.write("---\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations and the additional properties as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, "r", encoding="utf-8") as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split("---") if blk.strip()]
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # in ms default
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation is not None:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = []
    current = []
    words = text.split()
    for i, word in enumerate(words):
        current.append(word)
        if re.search(r"[.!?]$", word):
            if (i == len(words) - 1) or (words[i + 1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    if total_fr_words == 0:
        return [1 / len(translated_phrases)] * len(translated_phrases)
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions: Offline pyttsx3 Only ==============
def synthesize_phrase_pyttsx3(phrase: str, output_path: str, voice: str = None, rate: str = "+0%"):
    """
    Synthesize speech using offline pyttsx3.
    Saves the output as a WAV file.
    """
    engine = pyttsx3.init()
    # Optionally set the voice if available on your system.
    if voice is not None:
        engine.setProperty("voice", voice)
    base_rate = engine.getProperty("rate")
    try:
        modifier = int(rate.strip(" %+"))
    except Exception:
        modifier = 0
    new_rate = base_rate + modifier
    engine.setProperty("rate", new_rate)
    engine.save_to_file(phrase, output_path)
    engine.runAndWait()

async def synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    """
    Asynchronous wrapper for pyttsx3 synthesis.
    Runs the blocking pyttsx3 call inside an executor.
    """
    loop = asyncio.get_event_loop()
    with concurrent.futures.ThreadPoolExecutor() as pool:
        await loop.run_in_executor(pool, synthesize_phrase_pyttsx3, phrase, output_path, voice, rate)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await synthesize_phrase(phrase, output_path, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    offset_threshold = 0.05
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.wav")
            try:
                await synthesize_phrase_edge_hybrid(phrase, temp_path, voice="fr-FR-DeniseNeural", rate=voice_speed_override)
            except Exception as e:
                print(f"[Warning] Synthesis failed for phrase '{phrase}': {e}. Skipping this phrase.")
                continue
            try:
                audio = AudioSegment.from_file(temp_path, format="wav")
            except Exception as e:
                print(f"[Warning] Unable to load audio from {temp_path}: {e}. Skipping this phrase.")
                if os.path.exists(temp_path):
                    os.remove(temp_path)
                continue
            if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44 and audio:
                try:
                    audio = adjust_audio_duration(audio, phrase_duration)
                    phrase_audios.append(audio)
                except Exception as e:
                    print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
            else:
                print(f"[Warning] Missing or invalid file: {temp_path}")
            if os.path.exists(temp_path):
                os.remove(temp_path)
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        combined_audio += group_audio
        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    with open(debug_log_path, "w", encoding="utf-8") as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path

# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    print("Extracting audio...")
    audio_path = extract_audio()
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    print("Generating French audio with synchronization and manual overrides...")
    await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print("Merging audio and video...")
    merge_audio_video()
    print(f"Process completed! Output video: {output_video}")

if __name__ == "__main__":
    asyncio.run(async_main())


testa hanova voix -ATO NDRAY MIASA

to translate/4.2.2_Flux de navigation_Avr_08_Latest.mp4

In [1]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
import random

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# For this version we rely on cloud-based Edge TTS.
USE_EDGE_TTS = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path

# ============== Translation & Review Functions ==============

def split_long_groups(groups, max_group_duration_secs):
    """
    For each group (list of SubRipItems), if its duration > max_group_duration_secs,
    split it at the *last* subtitle in that group whose text ends in punctuation
    (.,!? or comma) before the duration threshold.
    Falls back to a simple split if no such “safe” break exists.
    """
    new_groups = []
    for group in groups:
        start_s = group[0].start.ordinal / 1000
        end_s   = group[-1].end.ordinal   / 1000
        total   = end_s - start_s

        # if already shorter than threshold, keep it
        if total <= max_group_duration_secs:
            new_groups.append(group)
            continue

        # otherwise walk through, tracking safe_breaks
        temp = []
        temp_start = start_s
        last_safe_idx = None
        for idx, item in enumerate(group):
            temp.append(item)
            # mark this idx if it ends in punctuation or comma
            if re.search(r"[.,!?]$", item.text.strip()):
                last_safe_idx = idx

            current_end = item.end.ordinal / 1000
            if (current_end - temp_start) >= max_group_duration_secs:
                # if we have a safe break before or at idx, split there
                if last_safe_idx is not None:
                    # emit group up through last_safe_idx
                    safe_group = temp[: last_safe_idx+1 ]
                    new_groups.append(safe_group)
                    # restart temp from the items after safe_idx
                    temp = temp[last_safe_idx+1 :]
                    temp_start = temp[0].start.ordinal / 1000 if temp else current_end
                else:
                    # no safe break—just split at current idx
                    new_groups.append(temp)
                    temp = []
                    temp_start = current_end

                # reset safe marker
                last_safe_idx = None

        # anything left over
        if temp:
            new_groups.append(temp)

    return new_groups


def parse_review_overrides(review_file_path):
    """
    Parse **Final Translation**, **Voice Speed**, **Pre‑Silence**, **Post‑Silence**.
    Returns a list of dicts, one per segment.
    """
    overrides = []
    text = open(review_file_path, "r", encoding="utf-8").read()
    blocks = [b.strip() for b in text.split("----------------------------------------------------------------") if b.strip()]

    for blk in blocks:
        ft = vs = None
        pre_ms = 0.0
        post_ms = 100.0  # Default to 100ms if not specified

        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                ft = line.split("**Final Translation:**",1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                vs = line.split("**Voice Speed:**",1)[1].strip()
            # Corrected hyphen in Pre-Silence and Post-Silence
            elif line.startswith("**Pre-Silence:**"):
                try:
                    pre_ms = float(line.split("**Pre-Silence:**",1)[1].strip())
                except ValueError:
                    pre_ms = 0.0
            elif line.startswith("**Post-Silence:**"):
                try:
                    post_ms = float(line.split("**Post-Silence:**",1)[1].strip())
                except ValueError:
                    post_ms = 100.0

        if ft is not None and vs is not None:
            overrides.append({
                "final_translation": ft,
                "voice_speed":      vs,
                "pre_silence":      pre_ms,
                "post_silence":     post_ms
            })

    print("Parsed review overrides:")
    for idx, o in enumerate(overrides, 1):
        print(f"  Segment {idx}: pre={o['pre_silence']}ms, post={o['post_silence']}ms, speed={o['voice_speed']}")
    return overrides

def generate_translation_review_file(
    source_path, review_file_path,
    from_lang="en", to_lang="fr",
    max_group_duration_secs: float = 25.0  # Increased max duration to reduce splits
):
    """
    1. Translates and groups by sentence.
    2. Splits any group longer than max_group_duration_secs
       into shorter chunks at subtitle-item boundaries.
    3. Writes the review file as before, one block per (sub-)group.
    """
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)

    # 1) Build initial sentence‑based groups
    sentence_end = re.compile(r"[.!?]\s*$")
    groups = []
    current = []
    for sub in subs:
        current.append(sub)
        if sentence_end.search(sub.text):
            groups.append(current)
            current = []
    if current:
        groups.append(current)

    # 2) Split any over‑long groups at safe punctuation boundaries
    groups = split_long_groups(groups, max_group_duration_secs)
    
    # 3) Enforce punctuation boundaries to prevent mid-sentence splits
    groups = enforce_punctuation_boundaries(groups)

    # 4) Write the review file
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("Please update the French text in the **Final Translation:** field below.\n")
        f.write("DO NOT change the keys (**Final Translation:**, **Voice Speed:**, **Pre-Silence:**, **Post-Silence:**).\n")
        f.write("----------------------------------------------------------------\n\n")

        for idx, group in enumerate(groups, 1):
            start_s = group[0].start.ordinal / 1000
            end_s   = group[-1].end.ordinal   / 1000
            original = " ".join(s.text for s in group)
            auto_tr  = translator.translate(text=original)

            f.write(f"Segment {idx} (start: {start_s:.2f}s, end: {end_s:.2f}s):\n")
            f.write(f"**Original:** {original}\n")
            f.write(f"**Auto Translated:** {auto_tr}\n")
            f.write(f"**Final Translation:** {auto_tr}\n")
            f.write(f"**Voice Speed:** +0%\n")
            f.write(f"**Pre-Silence:** 100\n")  # Default pre-silence
            f.write(f"**Post-Silence:** 100\n")  # Default post-silence
            f.write("----------------------------------------------------------------\n\n")

    print(f"✅ Review file created at: {review_file_path}  (split into {len(groups)} segments)")
    input("Type 'Y' when ready to continue: ")
    return groups


def enforce_punctuation_boundariesolf(groups):
    """
    Ensure each group’s last subtitle ends in .,!? or comma.
    If not, merge it with the next group (and repeat) until it does.
    """
    i = 0
    safe_re = re.compile(r"[.,!?]$")
    fixed = []
    while i < len(groups):
        g = groups[i]
        # if last line of this group doesn't end in safe punctuation
        if not safe_re.search(g[-1].text.strip()):
            # and there *is* a next group, merge them
            if i + 1 < len(groups):
                groups[i+1] = g + groups[i+1]
                i += 1
                continue
        # otherwise it's “safe” (or no next group to merge), keep it
        fixed.append(g)
        i += 1
    return fixed

def enforce_punctuation_boundaries(groups):
    """Ensure groups end with proper punctuation"""
    i = 0
    safe_punctuation = r"[.!?,;:]$"
    while i < len(groups):
        last_text = groups[i][-1].text.strip()
        if not re.search(safe_punctuation, last_text):
            if i+1 < len(groups):
                groups[i] += groups.pop(i+1)
            else:  # Add artificial pause for final group
                groups[i][-1].text += "."
        else:
            i += 1
    return groups



# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text)
    return [phrase.strip() for phrase in phrases if phrase.strip()]

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    if total_fr_words == 0:
        return [1 / len(translated_phrases)] * len(translated_phrases)
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions: Edge TTS Only with Debug Logging ==============
async def robust_synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", max_retries: int = 5):
    """
    Synthesize speech using Edge TTS with robust retry logic.
    Detailed debug messages are printed for each attempt.
    Note: In Edge TTS v7.0.0 the 'session' parameter is not supported.
    """
    for attempt in range(max_retries):
        try:
            # Create a new session for each attempt.
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate
                )
                print(f"[Debug] Attempt {attempt+1}: Synthesizing phrase: '{phrase}'")
                await communicate.save(output_path)
                print(f"[Debug] Phrase synthesized successfully to {output_path}")
                return
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Attempt {attempt+1}/{max_retries} failed for phrase: '{phrase}'. Exception: {e}")
            print(f"[Debug] Retrying in {wait_time:.2f} seconds...")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

async def synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await robust_synthesize_phrase(phrase, output_path, voice, rate)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await synthesize_phrase(phrase, output_path, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Updated Async Audio Generation Function ==============



async def async_generate_translated_audio_with_sync_using_review(
    subtitle_source_path, output_audio_path,
    debug_log_path, review_file_path
):
    # 1) Build/write review file & parse overrides
    groups    = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)

    # Pad overrides list so it matches the number of groups
    default_override = {
        "final_translation": None,   # we'll fall back to original text below
        "voice_speed":      "+0%",
        "pre_silence":      0.0,
        "post_silence":     100.0
    }
    while len(overrides) < len(groups):
        overrides.append(default_override.copy())

    combined_audio = AudioSegment.silent(duration=0)
    debug_lines    = []
    offset_threshold = 0.05  # seconds

    for idx, group in enumerate(groups):
        start_s = group[0].start.ordinal / 1000
        end_s   = group[-1].end.ordinal   / 1000
        seg_dur = end_s - start_s

        orig = " ".join(s.text for s in group)
        ovr  = overrides[idx]
        fr_text = ovr["final_translation"] or orig
        rate    = ovr["voice_speed"]
        pre_ms  = ovr["pre_silence"]
        post_ms = ovr["post_silence"]
        total_ms = int(seg_dur * 1000)

        # allocate content time (subtract pre & post)
        content_ms = max(0, total_ms - int(pre_ms) - int(post_ms))

        # split into phrases & weights
        phrases = split_french_phrases(fr_text)
        weights = calculate_phrase_weights(orig, phrases)

        # synth & adjust each phrase
        phrase_audios = []
        for i, ph in enumerate(phrases):
            dur = content_ms * weights[i] / 1000.0
            tmp = os.path.join(tempfile.gettempdir(), f"tmp_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(ph, tmp, voice="fr-FR-DeniseNeural", rate=rate)
                aud = AudioSegment.from_mp3(tmp)
                aud = adjust_audio_duration(aud, dur)
                phrase_audios.append(aud)
            except Exception as e:
                print(f"[Warning] phrase {i} failed: {e}")
            finally:
                if os.path.exists(tmp):
                    os.remove(tmp)

        # assemble with pre & post silence
        seg_audio = AudioSegment.silent(duration=pre_ms)
        for aud in phrase_audios:
            seg_audio += aud
        seg_audio += AudioSegment.silent(duration=post_ms)

        # trim/pad to exactly total_ms
        seg_audio = seg_audio[:total_ms]

        # if needed, apply global speed adjustment
        gen_dur = seg_audio.duration_seconds
        diff = seg_dur - gen_dur
        if abs(diff) > offset_threshold:
            factor = seg_dur / gen_dur
            print(f"[Debug] Segment {idx+1}: adjusting speed factor={factor:.3f}")
            seg_audio = change_playback_speed(seg_audio, factor)

        # place at the correct start in the combined track
        start_ms = int(start_s * 1000)
        if len(combined_audio) < start_ms:
            combined_audio += AudioSegment.silent(duration=start_ms - len(combined_audio))
        combined_audio += seg_audio

        # log
        debug_lines.append(
            f"Segment {idx+1} ({start_s:.2f}-{end_s:.2f}s): "
            f"pre={pre_ms}ms, post={post_ms}ms, speed={rate}, phrases={phrases}\n\n"
        )

    # write debug log & export
    with open(debug_log_path, "w", encoding="utf-8") as df:
        df.write("Translation Debug Log\n\n")
        df.writelines(debug_lines)

    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path





# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    print("Extracting audio...")
    audio_path = extract_audio()
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    print("Generating French audio with synchronization and manual overrides...")
    await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print("Merging audio and video...")
    merge_audio_video()
    print(f"Process completed! Output video: {output_video}")

if __name__ == "__main__":
    asyncio.run(async_main())


✅ ffmpeg found at: C:\ffmpeg\bin\ffmpeg.EXE
Extracting audio...
Transcribing audio...
Detected language: en
Generating English subtitles...
Generating French audio with synchronization and manual overrides...
✅ Review file created at: 4.2.4_Configuration de la solution_Avr_10_Latest_run_20250419_084520\translation_review.txt  (split into 28 segments)
Parsed review overrides:
  Segment 1: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 2: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 3: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 4: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 5: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 6: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 7: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 8: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 9: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 10: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 11: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 12: pre=0.0ms, post=100.0ms, speed=-10%
  Segment 13: pre=0.0ms, post=1

                                                                        

MoviePy - Done.
Moviepy - Writing video 4.2.4_Configuration de la solution_Avr_10_Latest_run_20250419_084520\4.2.4_Configuration de la solution_Avr_10_Latest-french.mp4



                                                                   

Moviepy - Done !
Moviepy - video ready 4.2.4_Configuration de la solution_Avr_10_Latest_run_20250419_084520\4.2.4_Configuration de la solution_Avr_10_Latest-french.mp4
Process completed! Output video: 4.2.4_Configuration de la solution_Avr_10_Latest_run_20250419_084520\4.2.4_Configuration de la solution_Avr_10_Latest-french.mp4


OPENAI

In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import random

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"

# Paths
os.makedirs(output_dir, exist_ok=True)
extracted_audio = os.path.join(output_dir, f"{base_name}-extracted-audio.wav")
subtitle_file = os.path.join(output_dir, f"{base_name}-english.srt")
review_file = os.path.join(output_dir, "translation_review.txt")
translated_audio = os.path.join(output_dir, f"{base_name}-french.wav")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")
output_video = os.path.join(output_dir, f"{base_name}-french.mp4")

# ============== Audio extraction & transcription ==============
def extract_audio():
    """
    Extracts the audio track from the input video into a 16kHz mono WAV file.
    Prints full ffmpeg stderr on failure for easier debugging.
    """
    try:
        # Run ffmpeg without capturing stderr so we can see any errors directly
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run()
        )
        return extracted_audio
    except ffmpeg.Error as e:
        # ffmpeg.Error contains stdout and stderr bytes
        print("⚠️ ffmpeg failed to extract audio. ffmpeg stderr output below:")
        try:
            print(e.stderr.decode('utf-8', errors='replace'))
        except Exception:
            print(e.stderr)
        raise
    
    # Should never reach here
    return None

class SubRipTimeConverter:
    @staticmethod
    def to_subrip(seconds: float) -> pysrt.SubRipTime:
        h = int(seconds // 3600)
        seconds %= 3600
        m = int(seconds // 60)
        s = int(seconds % 60)
        ms = int((seconds - s) * 1000)
        return pysrt.SubRipTime(hours=h, minutes=m, seconds=s, milliseconds=ms)

def generate_subtitles(segments, path):
    subs = pysrt.SubRipFile()
    for i, seg in enumerate(segments, 1):
        item = pysrt.SubRipItem(
            index=i,
            start=SubRipTimeConverter.to_subrip(seg['start']),
            end=SubRipTimeConverter.to_subrip(seg['end']),
            text=seg['text']
        )
        subs.append(item)
    subs.save(path, encoding='utf-8')
    return path

async def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    print(f"Detected language: {info.language}")
    out = []
    for s in segments:
        out.append({'start': s.start, 'end': s.end, 'text': s.text.strip()})
    return info.language, out

# ============== Review file grouping & splitting ==============
def split_long_groupsO(groups, max_secs):
    new = []
    safe = re.compile(r"[.,!?]$")
    for grp in groups:
        st = grp[0].start.ordinal/1000
        en = grp[-1].end.ordinal/1000
        if en-st <= max_secs:
            new.append(grp)
            continue
        temp, ts, last_safe = [], st, None
        for i,item in enumerate(grp):
            temp.append(item)
            if safe.search(item.text.strip()): last_safe = i
            now = item.end.ordinal/1000
            if now-ts >= max_secs:
                if last_safe is not None:
                    new.append(temp[:last_safe+1])
                    temp = temp[last_safe+1:]
                else:
                    new.append(temp)
                    temp = []
                ts = temp[0].start.ordinal/1000 if temp else now
                last_safe = None
        if temp: new.append(temp)
    return new

def enforce_punctuation_boundariesO(groups):
    safe = re.compile(r"[.,!?]$")
    out, i = [], 0
    while i < len(groups):
        g = groups[i]
        if not safe.search(g[-1].text.strip()) and i+1 < len(groups):
            groups[i+1] = g + groups[i+1]
        else:
            out.append(g)
            i += 1
    return out



def split_long_groups(groups, max_group_duration_secs):
    """
    For each group (list of SubRipItems), if its duration > max_group_duration_secs,
    split it at the *last* subtitle in that group whose text ends in punctuation
    (.,!? or comma) before the duration threshold.
    Falls back to a simple split if no such “safe” break exists.
    """
    new_groups = []
    safe_re = re.compile(r"[.,!?]$")
    for group in groups:
        start_s = group[0].start.ordinal / 1000
        end_s   = group[-1].end.ordinal   / 1000
        total   = end_s - start_s

        if total <= max_group_duration_secs:
            new_groups.append(group)
            continue

        temp = []
        temp_start = start_s
        last_safe_idx = None
        for idx, item in enumerate(group):
            temp.append(item)
            if safe_re.search(item.text.strip()):
                last_safe_idx = idx

            current_end = item.end.ordinal / 1000
            if (current_end - temp_start) >= max_group_duration_secs:
                if last_safe_idx is not None:
                    # split at last safe punctuation
                    new_groups.append(temp[: last_safe_idx+1])
                    temp = temp[last_safe_idx+1 :]
                else:
                    # no safe break, just split here
                    new_groups.append(temp)
                    temp = []
                temp_start = temp[0].start.ordinal / 1000 if temp else current_end
                last_safe_idx = None

        if temp:
            new_groups.append(temp)

    return new_groups

def enforce_punctuation_boundaries(groups):
    """
    Ensure each group’s last subtitle ends in .,!? or comma.
    If not, merge it with the next group (and repeat) until it does.
    """
    safe_re = re.compile(r"[.,!?]$")
    fixed = []
    i = 0
    while i < len(groups):
        g = groups[i]
        if not safe_re.search(g[-1].text.strip()) and i + 1 < len(groups):
            # merge into next
            groups[i+1] = g + groups[i+1]
        else:
            fixed.append(g)
            i += 1
    return fixed


# ============== Parse overrides ==============
def parse_review_overrides(review_path):
    blocks = [b.strip() for b in open(review_path, 'r', encoding='utf-8').read().split('----------------------------------------------------------------') if b.strip()]
    out=[]
    for blk in blocks:
        ft,vs,pre,post = None,'+0%',0,100
        for ln in blk.splitlines():
            if ln.startswith('**Final Translation:**'):
                ft = ln.split('**Final Translation:**',1)[1].strip()
            elif ln.startswith('**Voice Speed:**'):
                vs = ln.split('**Voice Speed:**',1)[1].strip()
            elif ln.startswith('**Pre-Silence:**'):
                pre = float(ln.split('**Pre-Silence:**',1)[1].strip())
            elif ln.startswith('**Post-Silence:**'):
                post= float(ln.split('**Post-Silence:**',1)[1].strip())
        out.append({
            'final_translation': ft,
            'voice_speed': vs,
            'pre_silence': int(pre),
            'post_silence':int(post)
        })
    print(f"Parsed {len(out)} overrides")
    return out

# ============== Phrase splitting & weights ==============
def split_french_phrases(text):
    return [p.strip() for p in re.split(r"(?<=[.!?])\s+(?=[A-Z])", text) if p.strip()]

def calculate_phrase_weights(orig, phrases):
    counts = [len(p.split()) for p in phrases]
    total  = sum(counts)
    return [c/total if total>0 else 1/len(phrases) for c in counts]

# ============== Robust Edge-TTS ==============
async def robust_synthesize_phrase(text, output_path, voice="fr-FR-DeniseNeural", rate="+0%", max_retries=5):
    tmp = output_path + ".tmp.mp3"
    for attempt in range(1, max_retries+1):
        try:
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as sess:
                comm = edge_tts.Communicate(text=text, voice=voice, rate=rate)
                print(f"[TTS] Attempt {attempt}: {text[:30]}…")
                await comm.save(tmp)
            seg = AudioSegment.from_mp3(tmp)
            seg.export(output_path, format='wav')
            os.remove(tmp)
            return
        except Exception as e:
            delay = 2**(attempt-1) + random.random()
            print(f"[TTS] Fail #{attempt}: {e} – retry in {delay:.1f}s")
            try: os.remove(tmp)
            except: pass
            await asyncio.sleep(delay)
    # fallback
    AudioSegment.silent(duration=1000).export(output_path, format='wav')

async def async_generate_translated_audio_with_sync_using_review(
    subtitle_srt, out_audio, debug_log, review_txt
):
    groups    = generate_translation_review_file(subtitle_srt, review_txt)
    overrides = parse_review_overrides(review_txt)
    default   = {'final_translation':None,'voice_speed':'+0%','pre_silence':0,'post_silence':100}
    while len(overrides)<len(groups): overrides.append(default.copy())

    combined = AudioSegment.silent(duration=0)
    debug    = []
    TH       = 0.05

    for i,grp in enumerate(groups):
        st = grp[0].start.ordinal/1000
        en = grp[-1].end.ordinal/1000
        dur_ms = int((en-st)*1000)
        orig = " ".join(x.text for x in grp)
        ovr  = overrides[i]
        fr   = ovr['final_translation'] or orig
        rate = ovr['voice_speed']
        pre  = ovr['pre_silence']
        post = ovr['post_silence']
        content_ms = max(0, dur_ms-pre-post)

        phrases = split_french_phrases(fr)
        weights = calculate_phrase_weights(orig, phrases)

        seg_audio = AudioSegment.silent(pre)
        for j,ph in enumerate(phrases):
            tmp = os.path.join(tempfile.gettempdir(), f"tts_{i}_{j}.wav")
            await robust_synthesize_phrase(ph, tmp, rate=rate)
            part = AudioSegment.from_wav(tmp)
            part = adjust_audio_duration(part, content_ms*weights[j]/1000)
            seg_audio += part
            os.remove(tmp)
        seg_audio += AudioSegment.silent(post)
        seg_audio = seg_audio[:dur_ms]

        # speed adjust
        gd = seg_audio.duration_seconds
        td = en-st
        if abs(td-gd)>TH:
            factor = td/gd
            seg_audio = change_playback_speed(seg_audio, factor)

        offset = int(st*1000)
        if len(combined)<offset:
            combined += AudioSegment.silent(offset-len(combined))
        combined += seg_audio
        debug.append(f"Segment {i+1}: {st:.2f}-{en:.2f}s pre={pre} post={post} rate={rate}\n")

    combined.export(out_audio, format='wav')
    with open(debug_log,'w',encoding='utf-8') as df:
        df.write("Debug Log\n\n"+"".join(debug))

# ============== Helpers ==============
def adjust_audio_duration(audio, target_dur_s):
    cur = audio.duration_seconds
    diff= target_dur_s - cur
    if diff>0.1:
        return audio + AudioSegment.silent(duration=int(diff*1000))
    if diff<-0.1:
        return audio[:int(target_dur_s*1000)]
    return audio

def change_playback_speed(sound, speed=1.0):
    fr = int(sound.frame_rate * speed)
    sp = sound._spawn(sound.raw_data, overrides={'frame_rate': fr})
    return sp.set_frame_rate(sound.frame_rate)

def merge_audio_video(video_in, audio_in, video_out):
    vid = VideoFileClip(video_in)
    aud = AudioFileClip(audio_in)
    vid = vid.set_audio(aud)
    vid.write_videofile(video_out, codec='libx264', audio_codec='aac')


import re
import pysrt
from deep_translator import GoogleTranslator

def generate_translation_review_file(
    source_path,
    review_file_path,
    from_lang: str = "en",
    to_lang:   str = "fr",
    max_group_duration_secs: float = 15.0,
    pause_for_review: bool = True
):
    """
    1) Group by sentence.
    2) Split groups longer than max_group_duration_secs at safe punctuation.
    3) Enforce that each group ends on .,!? or comma.
    4) Batch‑translate via GoogleTranslator.translate_batch(batch=…).
    5) Write the standard review file template.
    """
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs       = pysrt.open(source_path)
    sentence_end = re.compile(r"[.!?]\s*$")

    # 1) Build initial sentence‑based groups
    groups = []
    cur = []
    for sub in subs:
        cur.append(sub)
        if sentence_end.search(sub.text):
            groups.append(cur)
            cur = []
    if cur:
        groups.append(cur)

    # 2) Split any over‑long groups
    groups = split_long_groups(groups, max_group_duration_secs)

    # 3) Enforce natural ending punctuation
    groups = enforce_punctuation_boundaries(groups)

    # 4) Prepare originals & auto‑translate in batch
    originals = [" ".join(item.text for item in g) for g in groups]
    try:
        # correct signature is translate_batch(batch=…
        auto_translations = translator.translate_batch(batch=originals)
    except TypeError:
        # fallback if older version
        auto_translations = [translator.translate(text=o) for o in originals]

    # 5) Write out review file
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("Please update the French text in the **Final Translation:** field below.\n")
        f.write("DO NOT change the keys (**Final Translation:**, **Voice Speed:**, **Pre-Silence:**, **Post-Silence:**).\n")
        f.write("----------------------------------------------------------------\n\n")

        for idx, (grp, auto_tr) in enumerate(zip(groups, auto_translations), start=1):
            start_s = grp[0].start.ordinal / 1000
            end_s   = grp[-1].end.ordinal   / 1000
            original = " ".join(item.text for item in grp)

            f.write(f"Segment {idx} (start: {start_s:.2f}s, end: {end_s:.2f}s):\n")
            f.write(f"**Original:** {original}\n")
            f.write(f"**Auto Translated:** {auto_tr}\n")
            f.write(f"**Final Translation:** {auto_tr}\n")
            f.write(f"**Voice Speed:** +0%\n")
            f.write(f"**Pre-Silence:** 0\n")
            f.write(f"**Post-Silence:** 100\n")
            f.write("----------------------------------------------------------------\n\n")

    print(f"✅ Review file created at: {review_file_path}  (split into {len(groups)} segments)")
    if pause_for_review:
        input("Type 'Y' when ready to continue: ")
    return groups



# ============== Main ==============
async def main():
    print("1️⃣ Extracting audio…")
    wav = extract_audio()
    print("2️⃣ Transcribing…")
    lang, segs = await transcribe(wav)
    print("3️⃣ Writing SRT…")
    generate_subtitles(segs, subtitle_file)
    print("4️⃣ Preparing translation review…")
    # review file will pause here for your edits
    await asyncio.to_thread(generate_translation_review_file, subtitle_file, review_file, lang, 'fr')
    print("5️⃣ Generating translated audio…")
    await async_generate_translated_audio_with_sync_using_review(
        subtitle_file, translated_audio, debug_log_file, review_file
    )
    print("6️⃣ Merging with video…")
    merge_audio_video(input_video, translated_audio, output_video)
    print(f"✅ Done: {output_video}")

if __name__ == '__main__':
    asyncio.run(main())


✅ ffmpeg found at: C:\ffmpeg\bin\ffmpeg.EXE
1️⃣ Extracting audio…
2️⃣ Transcribing…
Detected language: en
3️⃣ Writing SRT…
4️⃣ Preparing translation review…


"to translate/4.2.4_Configuration de la solution_Avr_10_Latest.mp4"

In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
import random

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# For this version we rely on cloud-based Edge TTS.
USE_EDGE_TTS = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path


import re
import pysrt
from deep_translator import GoogleTranslator

def split_long_groups(groups, max_group_duration_secs):
    """
    For each group (a list of SubRipItems), if its span (end - start)
    exceeds max_group_duration_secs, break it at the last subtitle
    in that group which ends in .,!? (if possible), otherwise cut as is.
    """
    safe_re = re.compile(r"[.,!?]$")
    new_groups = []
    for group in groups:
        start_time = group[0].start.ordinal / 1000.0
        temp, last_safe = [], None

        for i, item in enumerate(group):
            temp.append(item)
            # mark safe cut point
            if safe_re.search(item.text.strip()):
                last_safe = i

            elapsed = (item.end.ordinal / 1000.0) - start_time
            if elapsed >= max_group_duration_secs:
                # split here
                if last_safe is not None and last_safe < len(temp)-1:
                    # cut at last safe boundary
                    new_groups.append(temp[: last_safe+1])
                    temp = temp[last_safe+1:]
                else:
                    new_groups.append(temp)
                    temp = []
                # reset for remainder
                if temp:
                    start_time = temp[0].start.ordinal / 1000.0
                last_safe = None

        if temp:
            new_groups.append(temp)
    return new_groups

def enforce_punctuation_boundaries(groups):
    """
    Ensure each group's last subtitle ends in .,!? or comma.
    If not, merge it into the next group until it does.
    """
    safe_re = re.compile(r"[.,!?]$")
    fixed = []
    i = 0
    while i < len(groups):
        g = groups[i]
        if not safe_re.search(g[-1].text.strip()) and (i + 1) < len(groups):
            # merge into the next group
            groups[i+1] = g + groups[i+1]
        else:
            fixed.append(g)
            i += 1
    return fixed

def generate_translation_review_file(
    source_path, review_file_path,
    from_lang="en", to_lang="fr",
    max_group_duration_secs: float = 15.0
):
    """
    1. Translates and groups by sentence.
    2. Splits any group longer than max_group_duration_secs
       into shorter chunks at subtitle-item boundaries.
    3. Enforces that every group ends on .,!? or comma.
    4. Writes the review file: one block per (sub-)group.
    """
    # load and initial sentence‐based grouping
    subs       = pysrt.open(source_path)
    sentence_end = re.compile(r"[.!?]\s*$")
    groups, cur = [], []
    for s in subs:
        cur.append(s)
        if sentence_end.search(s.text):
            groups.append(cur)
            cur = []
    if cur:
        groups.append(cur)

    # split too‐long groups
    groups = split_long_groups(groups, max_group_duration_secs)
    # enforce safe punctuation at end
    groups = enforce_punctuation_boundaries(groups)

    # translate & write review template
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("Please update the French text in the **Final Translation:** field below.\n")
        f.write("DO NOT change the keys (**Final Translation:**, **Voice Speed:**, **Silence Duration:**).\n")
        f.write("----------------------------------------------------------------\n")

        for i, grp in enumerate(groups, 1):
            start_s = grp[0].start.ordinal / 1000
            end_s   = grp[-1].end.ordinal   / 1000
            orig    = " ".join(s.text for s in grp)
            auto_tr = translator.translate(text=orig)

            f.write(f"Segment {i} (start: {start_s:.2f}s, end: {end_s:.2f}s):\n")
            f.write(f"**Original:** {orig}\n")
            f.write(f"**Auto Translated:** {auto_tr}\n")
            f.write(f"**Final Translation:** {auto_tr}\n")
            f.write(f"**Voice Speed:** +0%\n")
            f.write(f"**Silence Duration:** 100\n")
            f.write("----------------------------------------------------------------\n")

    print(f"✅ Review file created at: {review_file_path}  (split into {len(groups)} segments)")
    input("Type 'Y' when ready to continue: ")
    return groups




def parse_review_overrides(review_file_path):
    """
    Parse **Final Translation**, **Voice Speed**, **Pre‑Silence**, **Post‑Silence**.
    Returns a list of dicts, one per segment.
    """
    overrides = []
    text = open(review_file_path, "r", encoding="utf-8").read()
    blocks = [b.strip() for b in text.split("----------------------------------------------------------------") if b.strip()]

    for blk in blocks:
        ft = vs = None
        pre_ms = 0.0
        post_ms = 100.0

        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                ft = line.split("**Final Translation:**",1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                vs = line.split("**Voice Speed:**",1)[1].strip()
            elif line.startswith("**Pre‑Silence:**"):
                try:
                    pre_ms = float(line.split("**Pre‑Silence:**",1)[1].strip())
                except ValueError:
                    pre_ms = 0.0
            elif line.startswith("**Post‑Silence:**"):
                try:
                    post_ms = float(line.split("**Post‑Silence:**",1)[1].strip())
                except ValueError:
                    post_ms = 100.0

        if ft is not None and vs is not None:
            overrides.append({
                "final_translation": ft,
                "voice_speed":      vs,
                "pre_silence":      pre_ms,
                "post_silence":     post_ms
            })

    print("Parsed review overrides:")
    for idx, o in enumerate(overrides, 1):
        print(f"  Segment {idx}: pre={o['pre_silence']}ms, post={o['post_silence']}ms, speed={o['voice_speed']}")
    return overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text)
    return [phrase.strip() for phrase in phrases if phrase.strip()]

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    if total_fr_words == 0:
        return [1 / len(translated_phrases)] * len(translated_phrases)
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions: Edge TTS Only with Debug Logging ==============
async def robust_synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", max_retries: int = 5):
    """
    Synthesize speech using Edge TTS with robust retry logic.
    Detailed debug messages are printed for each attempt.
    Note: In Edge TTS v7.0.0 the 'session' parameter is not supported.
    """
    for attempt in range(max_retries):
        try:
            # Create a new session for each attempt.
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate
                )
                print(f"[Debug] Attempt {attempt+1}: Synthesizing phrase: '{phrase}'")
                await communicate.save(output_path)
                print(f"[Debug] Phrase synthesized successfully to {output_path}")
                return
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Attempt {attempt+1}/{max_retries} failed for phrase: '{phrase}'. Exception: {e}")
            print(f"[Debug] Retrying in {wait_time:.2f} seconds...")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

async def synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await robust_synthesize_phrase(phrase, output_path, voice, rate)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await synthesize_phrase(phrase, output_path, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_reviewOLD(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    offset_threshold = 0.05
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0

        print(f"[Debug] Segment {idx+1} final translation: {final_translation}")
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(phrase, temp_path, voice="fr-FR-DeniseNeural", rate=voice_speed_override)
            except Exception as e:
                print(f"[Warning] Synthesis failed for phrase '{phrase}': {e}. Skipping this phrase.")
                continue
            try:
                audio = AudioSegment.from_mp3(temp_path)
            except Exception as e:
                print(f"[Warning] Unable to load audio from {temp_path}: {e}. Skipping this phrase.")
                if os.path.exists(temp_path):
                    os.remove(temp_path)
                continue
            if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44 and audio:
                try:
                    audio = adjust_audio_duration(audio, phrase_duration)
                    phrase_audios.append(audio)
                except Exception as e:
                    print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
            else:
                print(f"[Warning] Missing or invalid file: {temp_path}")
            if os.path.exists(temp_path):
                os.remove(temp_path)
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"[Debug] Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        combined_audio += group_audio
        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    with open(debug_log_path, "w", encoding="utf-8") as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path




async def async_generate_translated_audio_with_sync_using_review(
    subtitle_source_path, output_audio_path,
    debug_log_path, review_file_path
):
    # 1) Build/write review file & parse overrides
    groups    = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)

    subs          = pysrt.open(subtitle_source_path)
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines    = []
    offset_threshold = 0.05  # sec

    for idx, group in enumerate(groups):
        start_s = group[0].start.ordinal / 1000
        end_s   = group[-1].end.ordinal   / 1000
        seg_dur = end_s - start_s

        orig = " ".join(s.text for s in group)
        ovr  = overrides[idx]
        fr_text = ovr["final_translation"]
        rate    = ovr["voice_speed"]
        pre_ms  = ovr["pre_silence"]
        post_ms = ovr["post_silence"]
        total_ms = int(seg_dur * 1000)

        # allocate content time (subtract pre & post)
        content_ms = max(0, total_ms - int(pre_ms) - int(post_ms))

        # split into phrases & weights
        phrases = split_french_phrases(fr_text)
        weights = calculate_phrase_weights(orig, phrases)

        # synth & adjust each phrase
        phrase_audios = []
        for i, ph in enumerate(phrases):
            dur = content_ms * weights[i] / 1000.0
            tmp = os.path.join(tempfile.gettempdir(), f"tmp_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(ph, tmp, voice="fr-FR-DeniseNeural", rate=rate)
                aud = AudioSegment.from_mp3(tmp)
                aud = adjust_audio_duration(aud, dur)
                phrase_audios.append(aud)
            except Exception as e:
                print(f"[Warning] phrase {i} failed: {e}")
            finally:
                if os.path.exists(tmp):
                    os.remove(tmp)

        # assemble with pre & post silence
        seg_audio = AudioSegment.silent(duration=pre_ms)
        for aud in phrase_audios:
            seg_audio += aud
        seg_audio += AudioSegment.silent(duration=post_ms)

        # trim/pad to exactly total_ms
        seg_audio = seg_audio[:total_ms]

        # if needed, apply global speed adjustment
        gen_dur = seg_audio.duration_seconds
        diff = seg_dur - gen_dur
        if abs(diff) > offset_threshold:
            factor = seg_dur / gen_dur
            seg_audio = change_playback_speed(seg_audio, factor)

        # place at the correct start in the combined track
        start_ms = int(start_s * 1000)
        if len(combined_audio) < start_ms:
            combined_audio += AudioSegment.silent(duration=start_ms - len(combined_audio))
        combined_audio += seg_audio

        # log
        debug_lines.append(
            f"Segment {idx+1} ({start_s:.2f}-{end_s:.2f}s): "
            f"pre={pre_ms}ms, post={post_ms}ms, speed={rate}, phrases={phrases}\n\n"
        )

    # write debug log & export
    with open(debug_log_path, "w", encoding="utf-8") as df:
        df.write("Translation Debug Log\n\n")
        df.writelines(debug_lines)

    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path




# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    print("Extracting audio...")
    audio_path = extract_audio()

    print("Transcribing audio (this may take a while)...")
    # Offload the blocking work to a thread:
    language, segments = await asyncio.to_thread(transcribe, audio_path)

    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    print("Generating French audio with synchronization and manual overrides...")
    await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print("Merging audio and video...")
    merge_audio_video()
    print(f"Process completed! Output video: {output_video}")

if __name__ == "__main__":
    print("Extracting audio…")
    audio_path = extract_audio()

    print("Transcribing audio…")
    language, segments = transcribe(audio_path)

    print("Generating subtitles…")
    generate_subtitle_file(segments, subtitle_file_en)

    print("Generating TTS & merging…")
    # <-- no args here
    asyncio.run(async_main())

    print(f"Done! Output is {output_video}")



TESTING SSML

In [9]:
import asyncio
import edge_tts

async def test_ssml():
    # Create a well-formed SSML string.
    ssml_text = (
        '<?xml version="1.0" encoding="UTF-8"?>'
        '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="fr-FR">'
        '<prosody rate="120%">Bonjour, comment allez-vous?</prosody>'
        '<break time="300ms"/>'
        '<prosody rate="+0%">Je suis ravi de vous voir.</prosody>'
        '</speak>'
    )
    output_file = "test_ssml_output.mp3"
    print("SSML to be synthesized:\n", ssml_text)
    
    # Create the Communicate object with the SSML text.
    communicate = edge_tts.Communicate(text=ssml_text, voice="fr-FR-DeniseNeural")
    
    # Call save() without any ssml parameter.
    await communicate.save(output_file)
    print(f"SSML synthesis complete. Check the output file: {output_file}")

if __name__ == "__main__":
    asyncio.run(test_ssml())


SSML to be synthesized:
 <?xml version="1.0" encoding="UTF-8"?><speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="fr-FR"><prosody rate="120%">Bonjour, comment allez-vous?</prosody><break time="300ms"/><prosody rate="+0%">Je suis ravi de vous voir.</prosody></speak>
SSML synthesis complete. Check the output file: test_ssml_output.mp3


WITH WORDS PAUSE

4.2.3_La création de rapports

In [1]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
import random

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.3_La création de rapports.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# For this version we rely on cloud-based Edge TTS.
USE_EDGE_TTS = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r"[.!?]\s*$")
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("Please update the French text in the **Final Translation:** field below.\n")
        f.write("DO NOT change the keys (**Final Translation:**, **Voice Speed:**, **Silence Duration:**).\n")
        f.write("----------------------------------------------------------------\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("----------------------------------------------------------------\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, "r", encoding="utf-8") as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split("----------------------------------------------------------------") if blk.strip()]
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # default ms
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    print("Parsed review file overrides:")
    for idx, override in enumerate(segments_overrides, 1):
        print(f"  Segment {idx} final translation: {override['final_translation']}")
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text)
    return [phrase.strip() for phrase in phrases if phrase.strip()]

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    if total_fr_words == 0:
        return [1 / len(translated_phrases)] * len(translated_phrases)
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions: Edge TTS Only with Debug Logging ==============
async def robust_synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", max_retries: int = 5):
    """
    Synthesize speech using Edge TTS with robust retry logic.
    Detailed debug messages are printed for each attempt.
    Note: In Edge TTS v7.0.0 the 'session' parameter is not supported.
    """
    for attempt in range(max_retries):
        try:
            # Create a new session for each attempt.
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate
                )
                print(f"[Debug] Attempt {attempt+1}: Synthesizing phrase: '{phrase}'")
                await communicate.save(output_path)
                print(f"[Debug] Phrase synthesized successfully to {output_path}")
                return
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Attempt {attempt+1}/{max_retries} failed for phrase: '{phrase}'. Exception: {e}")
            print(f"[Debug] Retrying in {wait_time:.2f} seconds...")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

async def synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await robust_synthesize_phrase(phrase, output_path, voice, rate)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await synthesize_phrase(phrase, output_path, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    offset_threshold = 0.05
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0

        print(f"[Debug] Segment {idx+1} final translation: {final_translation}")
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(phrase, temp_path, voice="fr-FR-DeniseNeural", rate=voice_speed_override)
            except Exception as e:
                print(f"[Warning] Synthesis failed for phrase '{phrase}': {e}. Skipping this phrase.")
                continue
            try:
                audio = AudioSegment.from_mp3(temp_path)
            except Exception as e:
                print(f"[Warning] Unable to load audio from {temp_path}: {e}. Skipping this phrase.")
                if os.path.exists(temp_path):
                    os.remove(temp_path)
                continue
            if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44 and audio:
                try:
                    audio = adjust_audio_duration(audio, phrase_duration)
                    phrase_audios.append(audio)
                except Exception as e:
                    print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
            else:
                print(f"[Warning] Missing or invalid file: {temp_path}")
            if os.path.exists(temp_path):
                os.remove(temp_path)
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"[Debug] Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        combined_audio += group_audio
        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    with open(debug_log_path, "w", encoding="utf-8") as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    return output_audio_path

# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    print("Extracting audio...")
    audio_path = extract_audio()
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    print("Generating French audio with synchronization and manual overrides...")
    await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print("Merging audio and video...")
    merge_audio_video()
    print(f"Process completed! Output video: {output_video}")

if __name__ == "__main__":
    asyncio.run(async_main())


✅ ffmpeg found at: C:\ffmpeg\bin\ffmpeg.EXE
Extracting audio...
Transcribing audio...
Detected language: en
Generating English subtitles...
Generating French audio with synchronization and manual overrides...
Review file created at: 4.2.3_La création de rapports_run_20250415_181829\translation_review.txt
Please review and update the final translations as needed.
Parsed review file overrides:
  Segment 1 final translation: Dans cette démo, nous explorerons comment créer un rapport de dépenses par entité couvrant différents scénarios pendant plusieurs années.
  Segment 2 final translation: Comment créer un résumé des dépenses pour toutes les unités commerciales d'une organisation avec différentes mesures contre les scénarios et l'exercice.
  Segment 3 final translation: Comment créer un rapport avec des sections de revenus et de dépenses regroupées par des entités.
  Segment 4 final translation: Comment modifier un rapport et ajouter une nouvelle dimension de présentation de données.
  S

                                                                        

MoviePy - Done.
Moviepy - Writing video 4.2.3_La création de rapports_run_20250415_181829\4.2.3_La création de rapports-french.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready 4.2.3_La création de rapports_run_20250415_181829\4.2.3_La création de rapports-french.mp4
Process completed! Output video: 4.2.3_La création de rapports_run_20250415_181829\4.2.3_La création de rapports-french.mp4


In [None]:
import edge_tts
print(edge_tts.__version__)


In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
from aiohttp import ClientConnectorError
import random

nest_asyncio.apply()

# --- Configuration ---
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.3_La création de rapports.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print('STDOUT:', e.stdout.decode('utf8'))
        print('STDERR:', e.stderr.decode('utf8'))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding='utf-8')
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r'[.!?]\s*$')
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)

    with open(review_file_path, 'w', encoding='utf-8') as f:
        f.write("Translation Review File\n")
        f.write("You can update the following properties for each segment:\n")
        f.write("  **Final Translation:** Your updated French text\n")
        f.write("  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')\n")
        f.write("  **Silence Duration:** Silence (in ms) to append (default 100 ms)\n")
        f.write("---\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations and the additional properties as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split('---') if blk.strip()]
    
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # in ms default
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation is not None:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        if re.search(r'[.!?]$', word):
            if (i == len(words)-1) or (words[i+1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    
    if total_fr_words == 0:
        return [1/len(translated_phrases)] * len(translated_phrases)
    
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== Robust TTS Function with Retry and Jitter ==============
async def robust_synthesize_phrase(
    phrase: str,
    output_path: str,
    connector: aiohttp.TCPConnector,
    voice: str = "fr-FR-DeniseNeural",
    rate: str = "+0%",
    max_retries: int = 5
):
    """
    Synthesize speech for a given phrase using Edge TTS with robust retry logic.
    Uses exponential backoff with random jitter to mitigate transient connection issues.
    """
    for attempt in range(max_retries):
        try:
            # Create a new session using the shared persistent connector.
            async with aiohttp.ClientSession(connector=connector, timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate,
                    connector=connector
                )
                await communicate.save(output_path)
                return
        except (ClientConnectorError, ConnectionResetError) as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Connection error on attempt {attempt+1}/{max_retries} for phrase: '{phrase}': {e}. Retrying in {wait_time:.2f} seconds.")
            await asyncio.sleep(wait_time)
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Error on attempt {attempt+1}/{max_retries} for phrase: '{phrase}': {e}. Retrying in {wait_time:.2f} seconds.")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

# ============== TTS Functions Wrapper ==============
# For backward compatibility, if you want to call the function by its former name.
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, connector: aiohttp.TCPConnector, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%"):
    await robust_synthesize_phrase(phrase, output_path, connector, voice, rate)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Persistent Connector Creation ==============
def create_persistent_connector():
    ssl_context = ssl.create_default_context()
    connector = aiohttp.TCPConnector(ssl=ssl_context, limit=10)
    return connector

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path, persistent_connector):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    
    offset_threshold = 0.05
    
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0
        
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            try:
                await robust_synthesize_phrase(
                    phrase, temp_path, connector=persistent_connector, voice="fr-FR-DeniseNeural", rate=voice_speed_override
                )
                if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44:
                    try:
                        audio = AudioSegment.from_mp3(temp_path)
                        audio = adjust_audio_duration(audio, phrase_duration)
                        phrase_audios.append(audio)
                    except Exception as e:
                        print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
                else:
                    print(f"[Warning] Missing or invalid file: {temp_path}")
            finally:
                if os.path.exists(temp_path):
                    os.remove(temp_path)
        
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        
        combined_audio += group_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    
    with open(debug_log_path, 'w', encoding='utf-8') as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    
    return output_audio_path

# ============== Merge Audio and Video Function (unchanged) ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    persistent_connector = create_persistent_connector()  # Create the persistent connector
    try:
        print("Extracting audio...")
        audio_path = extract_audio()
    
        print("Transcribing audio...")
        language, segments = transcribe(audio_path)
    
        print("Generating English subtitles...")
        generate_subtitle_file(segments, subtitle_file_en)
    
        print("Generating French audio with synchronization and manual overrides...")
        await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file, persistent_connector)
    
        print("Merging audio and video...")
        merge_audio_video()
    
        print(f"Process completed! Output video: {output_video}")
    finally:
        await persistent_connector.close()  # Close the persistent connector

if __name__ == "__main__":
    asyncio.run(async_main())


In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from gtts import gTTS
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio, datetime
import asyncio
import edge_tts
import tempfile
import sys  # Import the sys module
from datetime import datetime


nest_asyncio.apply()

# --- Configuration ---
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
subtitle_file_fr = os.path.join(output_dir, f"{input_video_name}-french.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")


# ============== Helper Functions ==============

def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print('STDOUT:', e.stdout.decode('utf8'))
        print('STDERR:', e.stderr.decode('utf8'))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")

    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding='utf-8')
    return output_path

# ============== Translation & Review Functions ==============

def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)

    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r'[.!?]\s*$')
    original_texts = []

    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)

    with open(review_file_path, 'w', encoding='utf-8') as f:
        f.write("Translation Review File\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            original_texts.append(original_text)
            auto_translated = translator.translate(text=original_text)
            final_translation = auto_translated
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {final_translation}\n")
            f.write("---\n\n")

    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations in the file as needed.")

    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break

    return groups, original_texts

def parse_final_translations(review_file_path):
    final_translations = []
    with open(review_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    segments = content.strip().split('---')
    for seg in segments:
        if seg.strip() == "" or "Final Translation:" not in seg:
            continue
        match = re.search(r'\*\*Final Translation:\*\*\s*(.*)', seg)
        if match:
            final_translation = match.group(1).strip()
            final_translations.append(final_translation)
    return final_translations

def validate_review_with_retranslation(review_file_path, groups, original_texts, from_lang, to_lang):
    translator = GoogleTranslator(source=from_lang, target=to_lang)

    while True:
        current_originals = []
        updated_segments = []

        with open(review_file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        segments = content.strip().split('---')
        seg_index = 0
        for i, seg in enumerate(segments):
            if not seg.strip() or "Segment" not in seg:
                continue

            if seg_index < len(original_texts) and "Original:" in seg:
                original_match = re.search(r'\*\*Original:\*\*\s*(.*)', seg)
                current_original = original_match.group(1).strip() if original_match else ""
                current_originals.append(current_original)

                if current_original != original_texts[seg_index]:
                    updated_segments.append(seg_index)
            else:
                current_originals.append("")

            seg_index += 1

        if updated_segments:
            print(f"⚠️  Detected changes in {len(updated_segments)} original segments. Retranslating...")
            new_content = []
            seg_index = 0

            for seg in segments:
                if not seg.strip() or "Segment" not in seg:
                    new_content.append(seg)
                    continue

                if seg_index in updated_segments:
                    new_translation = translator.translate(text=current_originals[seg_index])

                    seg = re.sub(
                        r'(\*\*Auto Translated:\*\*)(.*?)(\n\*\*Final Translation:\*\*)',
                        f'\\1 {new_translation}\\3 {new_translation}',
                        seg,
                        flags=re.DOTALL
                    )

                new_content.append(seg)
                seg_index += 1

            with open(review_file_path, 'w', encoding='utf-8') as f:
                f.write('---'.join(new_content))

            print("Updated translations written. Please review changes.")
            while True:
                user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
                if user_confirmation == "y":
                    break
            original_texts = current_originals
        else:
            break

    return parse_final_translations(review_file_path)


# ============== Audio Synchronization Functions ==============

def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration

    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        adjusted_audio = audio + silence
        return adjusted_audio
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        adjusted_audio = audio[:-int(trim_duration_ms)]
        return adjusted_audio
    else:
        return audio


def synthesize_phrase_edge(text, output_path, voice="fr-FR-DeniseNeural", rate="+0%"):
    """Synthesize the given phrase using edge-tts, using shell=True for Windows compatibility."""
    command = [
        "edge-tts",
        "--voice", voice,
        "--text", text,
        "--write-media", output_path,
        "--rate", rate
    ]

    try:
        process = asyncio.create_subprocess_exec(
            *command,
            stdin=asyncio.subprocess.PIPE,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            shell=True  # Enable shell execution
        )

        async def run_process():
            proc = await process
            stdout, stderr = await proc.communicate()

            if proc.returncode != 0:
                print(f"Error executing edge-tts: {stderr.decode()}")
            else:
                print(f"Successfully synthesized phrase: {text[:50]}... to {output_path}")
                return output_path

        return asyncio.run(run_process())

    except NotImplementedError as e:
        print(f"NotImplementedError: {e}. This might be due to asyncio subprocess issues on Windows.")
        print("Trying a different approach using os.system (less robust)...")
        
        # Fallback to os.system (less robust, but might work on Windows)
        command_str = " ".join(command)
        return_code = os.system(command_str)
        
        if return_code == 0:
            print(f"Successfully synthesized phrase (using os.system): {text[:50]}... to {output_path}")
            return output_path
        else:
            print(f"Error executing edge-tts (using os.system): Return code {return_code}")
            return None


def split_french_phrases(text):
    phrases = []
    current = []
    words = text.split()

    for i, word in enumerate(words):
        current.append(word)
        if re.search(r'[.!?]$', word):
            if (i == len(words)-1) or (words[i+1][0].isupper()):
                phrases.append(" ".join(current))
                current = []

    if current:
        phrases.append(" ".join(current))
    return phrases

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)

    if total_fr_words == 0:
        return [1/len(translated_phrases)] * len(translated_phrases)

    return [count / total_fr_words for count in fr_phrase_word_counts]


def generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    groups, original_texts = generate_translation_review_file(subtitle_source_path, review_file_path)
    final_translations = validate_review_with_retranslation(review_file_path, groups, original_texts, "en", "fr")

    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []

    offset_threshold = 0.05

    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start

        original_text = " ".join([sub.text for sub in group])
        final_translation = final_translations[idx]

        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)

        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")

            audio_file = synthesize_phrase_edge(
                phrase, temp_path, voice="fr-FR-DeniseNeural", rate="+0%"
            )

            if audio_file and os.path.exists(temp_path) and os.path.getsize(temp_path) > 44:
                try:
                    audio = AudioSegment.from_mp3(temp_path)
                    audio = adjust_audio_duration(audio, phrase_duration)
                    phrase_audios.append(audio)
                except Exception as e:
                    print(f"[Warning] Audio corrompu ignoré : {temp_path}. Erreur: {e}")
            else:
                print(f"[Warning] Fichier manquant ou invalide: {temp_path}")

            if os.path.exists(temp_path):
                os.remove(temp_path)

        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=100)

        group_audio = group_audio[:int(target_duration * 1000)]

        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration

        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : ajustement de vitesse appliqué, facteur={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)

        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence

        combined_audio += group_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration avant ajustement:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)

    with open(debug_log_path, 'w', encoding='utf-8') as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)

    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")

    return output_audio_path


# ============== Merge Audio and Video Function ==============

def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)

    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio = audio + extra_silence
    
    new_video = video.set_audio(audio)
    new_video.write_videofile(output_video, codec="libx264", audio_codec="aac", temp_audiofile='temp-audio.m4a', remove_temp=True)
    return output_video


# ============== Main Flow ==============

if __name__ == "__main__":
    # Check if running on Windows
    if os.name == 'nt':
        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

    # 1. Extract Audio
    print("Extracting audio...")
    extracted_audio = extract_audio()

    # 2. Transcribe Audio
    print("Transcribing audio...")
    language, transcript_segments = transcribe(extracted_audio)

    # 3. Generate English Subtitle File
    print("Generating English subtitle file...")
    subtitle_file_en = generate_subtitle_file(transcript_segments, subtitle_file_en)

    # 4. Generate Translation Review File
    print("Generating translation review file...")
    groups, original_texts = generate_translation_review_file(subtitle_file_en, review_file)

    # 5. NEW: Validate Review and Retranslate if Originals Changed
    print("Validating and applying translations from review file...")
    final_translations = validate_review_with_retranslation(
        review_file, groups, original_texts, "en", "fr"
    )

    # 6. Generate Translated Audio
    print("Generating translated audio...")
    translated_audio = generate_translated_audio_with_sync_using_review(
        subtitle_file_en, translated_audio, debug_log_file, review_file
    )

    # 7. Merge Audio and Video
    print("Merging audio and video...")
    output_video = merge_audio_video()

    print(f"✅ Final video saved to: {output_video}")


4.2.2_Flux de navigation_Avr_08_Latest

In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from gtts import gTTS
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime   

nest_asyncio.apply()

# --- Configuration ---
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "4.2.3_La création de rapports.mp4"  # Path to your input video
base_name = os.path.splitext(os.path.basename(input_video))[0]  # e.g. "4.2.4_Configuration de la solution_Avr_10_Latest"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")            # e.g. "20250414_173015"
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"        # Whisper model size (tiny, base, small, medium, large)
update_existing = True      # Set to True to allow interactive review/edit of translations

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
subtitle_file_fr = os.path.join(output_dir, f"{input_video_name}-french.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")


# ============== Helper Functions ==============

def extract_audio():
    """Extract audio from video using ffmpeg"""
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)  # mono and 16kHz
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print('STDOUT:', e.stdout.decode('utf8'))
        print('STDERR:', e.stderr.decode('utf8'))
        raise

def transcribe(audio_path):
    """Transcribe audio using faster-whisper."""
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    """Convert seconds to SubRipTime format"""
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    """Generate subtitle file (SRT) from segments."""
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding='utf-8')
    return output_path

# ============== Translation & Review Functions ==============

def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    """Generate a review file that lists each segment's original and auto translated text.
    
    The review file is written in a format that allows the user to update the final translation.
    """
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    
    # Group subtitles by sentence using a simple punctuation detection.
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r'[.!?]\s*$')
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)

    # Write the review file using the grouping information.
    with open(review_file_path, 'w', encoding='utf-8') as f:
        f.write("Translation Review File\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            # Concatenate original texts.
            original_text = " ".join([sub.text for sub in group])
            # Auto-translate the concatenated text.
            auto_translated = translator.translate(text=original_text)
            # Start with auto translation as the default final translation.
            final_translation = auto_translated
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {final_translation}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations in the file as needed.")
    # Wait for user confirmation.
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_final_translations(review_file_path):
    """Parse the updated review file to extract the final translations for each segment group.
    
    This function expects that each segment block includes a line starting with 
    '**Final Translation:**' and returns a list of final translations (in the same order as groups).
    """
    final_translations = []
    with open(review_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    # Split by segments based on the separator.
    segments = content.strip().split('---')
    for seg in segments:
        if seg.strip() == "" or "Final Translation:" not in seg:
            continue
        # Extract the line that starts with '**Final Translation:**'
        match = re.search(r'\*\*Final Translation:\*\*\s*(.*)', seg)
        if match:
            final_translation = match.group(1).strip()
            final_translations.append(final_translation)
    return final_translations

# ============== Audio Synchronization Functions ==============

def adjust_audio_duration(audio, target_duration):
    """Adjust the audio to match the target duration.
    
    If audio is too short, append silence; if too long, trim the end.
    """
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    
    if difference > 0.1:  # Audio too short: add silence.
        silence = AudioSegment.silent(duration=difference * 1000)
        adjusted_audio = audio + silence
        return adjusted_audio
    elif difference < -0.1:  # Audio too long: trim the end.
        trim_duration_ms = abs(difference) * 1000
        adjusted_audio = audio[:-int(trim_duration_ms)]
        return adjusted_audio
    else:
        return audio


# ============== NEW: French Phrase Alignment Functions ==============


def split_french_phrases(text):
    """Split French text into phrases using punctuation aware of French grammar."""
    # Fixed regex pattern with atomic grouping for French abbreviations
    sentence_end = re.compile(
        r'(?<!\bM(?:r|me|s|rs|mes))\s*([.!?])(?:\s+|$)'
    )
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        # Check for sentence-ending punctuation with French context
        if sentence_end.search(word):
            # Check if next word starts with uppercase (proper sentence end)
            if i+1 < len(words) and words[i+1][0].isupper():
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases



def split_french_phrases(text):
    """Split French text at natural phrase boundaries for technical content."""
    # Focus on sentence-ending punctuation followed by uppercase
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        # Check if word ends with sentence punctuation
        if re.search(r'[.!?]$', word):
            # Check if next word starts with uppercase or we're at the end
            if (i == len(words)-1) or (words[i+1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    
    if current:
        phrases.append(" ".join(current))
    return phrases



def calculate_phrase_weights(original_text, translated_phrases):
    """Calculate duration allocation weights for French phrases based solely on their word counts."""
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    
    if total_fr_words == 0:
        return [1/len(translated_phrases)] * len(translated_phrases)
    
    # Compute each weight as the fraction of the total French words
    return [count / total_fr_words for count in fr_phrase_word_counts]


# ============== MODIFIED Audio Generat
    
  
import tempfile

# ============== MODIFIED Audio Generation Function ==============


import os
import asyncio
import tempfile
import pysrt
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError




def generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    """Version améliorée avec alignement au niveau des phrases françaises, détection du décalage
       et ajustement automatique de la vitesse de lecture pour corriger les écarts."""
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    final_translations = parse_final_translations(review_file_path)
    
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    
    # Seuil pour détecter un décalage notable (en secondes)
    offset_threshold = 0.05  # par exemple 50 ms, à ajuster selon vos tests
    
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        
        original_text = " ".join([sub.text for sub in group])
        final_translation = final_translations[idx]
        
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            # Utiliser une extension .mp3 car edge-tts produit du MP3
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            
            try:
                asyncio.run(synthesize_phrase_edge(
                    phrase, temp_path, voice="fr-FR-DeniseNeural", rate="+0%"
                ))
                
                if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44:
                    try:
                        audio = AudioSegment.from_mp3(temp_path)
                        audio = adjust_audio_duration(audio, phrase_duration)
                        phrase_audios.append(audio)
                    except Exception as e:
                        print(f"[Warning] Audio corrompu ignoré : {temp_path}. Erreur: {e}")
                else:
                    print(f"[Warning] Fichier manquant ou invalide: {temp_path}")
            finally:
                if os.path.exists(temp_path):
                    os.remove(temp_path)
        
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=100)
        
        # Optionnel : on tronque si la durée dépasse
        group_audio = group_audio[:int(target_duration * 1000)]
        
        # Vérifier si le segment audio généré colle exactement aux timings attendus
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        
        # Si l'écart est significatif, ajuster la vitesse.
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : ajustement de vitesse appliqué, facteur={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        
        combined_audio += group_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration avant ajustement:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    
    with open(debug_log_path, 'w', encoding='utf-8') as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    
    return output_audio_path



# ============== Merge Audio and Video Function ==============

def merge_audio_video():
    """Merge the newly generated audio with the original video."""
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    
    # If the audio is shorter than video, append silence.
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )


import asyncio
import edge_tts


async def synthesize_phrase_edge(
    phrase: str,
    output_path: str,
    voice: str = "fr-FR-DeniseNeural",
    rate: str = "+0%"
):
    communicate = edge_tts.Communicate(
        text=phrase,
        voice=voice,
        rate=rate
    )
    await communicate.save(output_path)  # Removed format parameter



def change_playback_speed(sound, speed=1.0):
    """Change la vitesse de lecture de 'sound' par le facteur 'speed'."""
    # On modifie le frame rate, puis on remet à la normale pour obtenir le nouveau son.
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)




# ============== Main Flow ==============

if __name__ == "__main__":
    # Step 1: Extract audio.
    print("Extracting audio...")
    audio_path = extract_audio()
    
    # Step 2: Transcribe audio.
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    
    # Step 3: Generate English subtitles.
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    
    # (Optional) Step 4: Translate subtitles (if needed for other purposes).
    # In our flow, we now use the auto translation during review.
    # print("Translating subtitles...")
    # translate_subtitles(subtitle_file_en, subtitle_file_fr)
    
    # Step 5: Generate French audio using the review file.
    print("Generating French audio with synchronization...")
    generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print(f"Debug log written to: {debug_log_file}")
    
    # Step 6: Merge audio and video.
    print("Merging audio and video...")
    merge_audio_video()
    
    print(f"Process completed! Output video: {output_video}")


In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from gtts import gTTS
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime   

nest_asyncio.apply()

# --- Configuration ---
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "4.2.4_Configuration de la solution_Avr_10_Latest.mp4" 
base_name = os.path.splitext(os.path.basename(input_video))[0]  # e.g. "4.2.4_Configuration de la solution_Avr_10_Latest"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")            # e.g. "20250414_173015"
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"        # Whisper model size (tiny, base, small, medium, large)
update_existing = True      # Set to True to allow interactive review/edit of translations

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
subtitle_file_fr = os.path.join(output_dir, f"{input_video_name}-french.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")


# ============== Helper Functions ==============

def extract_audio():
    """Extract audio from video using ffmpeg"""
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)  # mono and 16kHz
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print('STDOUT:', e.stdout.decode('utf8'))
        print('STDERR:', e.stderr.decode('utf8'))
        raise

def transcribe(audio_path):
    """Transcribe audio using faster-whisper."""
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    """Convert seconds to SubRipTime format"""
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    """Generate subtitle file (SRT) from segments."""
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding='utf-8')
    return output_path

# ============== Translation & Review Functions ==============

def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    """Generate a review file that lists each segment's original and auto translated text.
    
    The review file is written in a format that allows the user to update the final translation.
    """
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    
    # Group subtitles by sentence using a simple punctuation detection.
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r'[.!?]\s*$')
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)

    # Write the review file using the grouping information.
    with open(review_file_path, 'w', encoding='utf-8') as f:
        f.write("Translation Review File\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            # Concatenate original texts.
            original_text = " ".join([sub.text for sub in group])
            # Auto-translate the concatenated text.
            auto_translated = translator.translate(text=original_text)
            # Start with auto translation as the default final translation.
            final_translation = auto_translated
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {final_translation}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations in the file as needed.")
    # Wait for user confirmation.
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_final_translations(review_file_path):
    """Parse the updated review file to extract the final translations for each segment group.
    
    This function expects that each segment block includes a line starting with 
    '**Final Translation:**' and returns a list of final translations (in the same order as groups).
    """
    final_translations = []
    with open(review_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    # Split by segments based on the separator.
    segments = content.strip().split('---')
    for seg in segments:
        if seg.strip() == "" or "Final Translation:" not in seg:
            continue
        # Extract the line that starts with '**Final Translation:**'
        match = re.search(r'\*\*Final Translation:\*\*\s*(.*)', seg)
        if match:
            final_translation = match.group(1).strip()
            final_translations.append(final_translation)
    return final_translations

# ============== Audio Synchronization Functions ==============

def adjust_audio_duration(audio, target_duration):
    """Adjust the audio to match the target duration.
    
    If audio is too short, append silence; if too long, trim the end.
    """
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    
    if difference > 0.1:  # Audio too short: add silence.
        silence = AudioSegment.silent(duration=difference * 1000)
        adjusted_audio = audio + silence
        return adjusted_audio
    elif difference < -0.1:  # Audio too long: trim the end.
        trim_duration_ms = abs(difference) * 1000
        adjusted_audio = audio[:-int(trim_duration_ms)]
        return adjusted_audio
    else:
        return audio


# ============== NEW: French Phrase Alignment Functions ==============


def split_french_phrases(text):
    """Split French text into phrases using punctuation aware of French grammar."""
    # Fixed regex pattern with atomic grouping for French abbreviations
    sentence_end = re.compile(
        r'(?<!\bM(?:r|me|s|rs|mes))\s*([.!?])(?:\s+|$)'
    )
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        # Check for sentence-ending punctuation with French context
        if sentence_end.search(word):
            # Check if next word starts with uppercase (proper sentence end)
            if i+1 < len(words) and words[i+1][0].isupper():
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases



def split_french_phrases(text):
    """Split French text at natural phrase boundaries for technical content."""
    # Focus on sentence-ending punctuation followed by uppercase
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        # Check if word ends with sentence punctuation
        if re.search(r'[.!?]$', word):
            # Check if next word starts with uppercase or we're at the end
            if (i == len(words)-1) or (words[i+1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    
    if current:
        phrases.append(" ".join(current))
    return phrases



def calculate_phrase_weights(original_text, translated_phrases):
    """Calculate duration allocation weights for French phrases based solely on their word counts."""
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    
    if total_fr_words == 0:
        return [1/len(translated_phrases)] * len(translated_phrases)
    
    # Compute each weight as the fraction of the total French words
    return [count / total_fr_words for count in fr_phrase_word_counts]


# ============== MODIFIED Audio Generat
    
  
import tempfile

# ============== MODIFIED Audio Generation Function ==============


import os
import asyncio
import tempfile
import pysrt
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError




def generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path):
    """Version améliorée avec alignement au niveau des phrases françaises, détection du décalage
       et ajustement automatique de la vitesse de lecture pour corriger les écarts."""
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    final_translations = parse_final_translations(review_file_path)
    
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    
    # Seuil pour détecter un décalage notable (en secondes)
    offset_threshold = 0.05  # par exemple 50 ms, à ajuster selon vos tests
    
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        
        original_text = " ".join([sub.text for sub in group])
        final_translation = final_translations[idx]
        
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            # Utiliser une extension .mp3 car edge-tts produit du MP3
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            
            try:
                asyncio.run(synthesize_phrase_edge(
                    phrase, temp_path, voice="fr-FR-DeniseNeural", rate="+0%"
                ))
                
                if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44:
                    try:
                        audio = AudioSegment.from_mp3(temp_path)
                        audio = adjust_audio_duration(audio, phrase_duration)
                        phrase_audios.append(audio)
                    except Exception as e:
                        print(f"[Warning] Audio corrompu ignoré : {temp_path}. Erreur: {e}")
                else:
                    print(f"[Warning] Fichier manquant ou invalide: {temp_path}")
            finally:
                if os.path.exists(temp_path):
                    os.remove(temp_path)
        
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=100)
        
        # Optionnel : on tronque si la durée dépasse
        group_audio = group_audio[:int(target_duration * 1000)]
        
        # Vérifier si le segment audio généré colle exactement aux timings attendus
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        
        # Si l'écart est significatif, ajuster la vitesse.
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : ajustement de vitesse appliqué, facteur={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        
        combined_audio += group_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration avant ajustement:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    
    with open(debug_log_path, 'w', encoding='utf-8') as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    
    return output_audio_path



# ============== Merge Audio and Video Function ==============

def merge_audio_video():
    """Merge the newly generated audio with the original video."""
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    
    # If the audio is shorter than video, append silence.
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )


import asyncio
import edge_tts


async def synthesize_phrase_edge(
    phrase: str,
    output_path: str,
    voice: str = "fr-FR-DeniseNeural",
    rate: str = "+0%"
):
    communicate = edge_tts.Communicate(
        text=phrase,
        voice=voice,
        rate=rate
    )
    await communicate.save(output_path)  # Removed format parameter



def change_playback_speed(sound, speed=1.0):
    """Change la vitesse de lecture de 'sound' par le facteur 'speed'."""
    # On modifie le frame rate, puis on remet à la normale pour obtenir le nouveau son.
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)




# ============== Main Flow ==============

if __name__ == "__main__":
    # Step 1: Extract audio.
    print("Extracting audio...")
    audio_path = extract_audio()
    
    # Step 2: Transcribe audio.
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    
    # Step 3: Generate English subtitles.
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    
    # (Optional) Step 4: Translate subtitles (if needed for other purposes).
    # In our flow, we now use the auto translation during review.
    # print("Translating subtitles...")
    # translate_subtitles(subtitle_file_en, subtitle_file_fr)
    
    # Step 5: Generate French audio using the review file.
    print("Generating French audio with synchronization...")
    generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print(f"Debug log written to: {debug_log_file}")
    
    # Step 6: Merge audio and video.
    print("Merging audio and video...")
    merge_audio_video()
    
    print(f"Process completed! Output video: {output_video}")


UPDATE AFTER VIEWING VIDEO : adjust speed and silence
Translation Review File
You can update the following properties for each segment:
  **Final Translation:** Your updated French text
  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')
  **Silence Duration:** Silence (in ms) to append (default 100 ms)
---

In [None]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
from aiohttp import ClientConnectorError

nest_asyncio.apply()

# --- Configuration ---
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "4.2.4_Configuration de la solution_Avr_10_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print('STDOUT:', e.stdout.decode('utf8'))
        print('STDERR:', e.stderr.decode('utf8'))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding='utf-8')
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r'[.!?]\s*$')
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)

    with open(review_file_path, 'w', encoding='utf-8') as f:
        f.write("Translation Review File\n")
        f.write("You can update the following properties for each segment:\n")
        f.write("  **Final Translation:** Your updated French text\n")
        f.write("  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')\n")
        f.write("  **Silence Duration:** Silence (in ms) to append (default 100 ms)\n")
        f.write("---\n\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("---\n\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations and the additional properties as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue using the updated review file: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split('---') if blk.strip()]
    
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # in ms default
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation is not None:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== French Phrase Alignment Functions ==============
def split_french_phrases(text):
    phrases = []
    current = []
    words = text.split()
    
    for i, word in enumerate(words):
        current.append(word)
        if re.search(r'[.!?]$', word):
            if (i == len(words)-1) or (words[i+1][0].isupper()):
                phrases.append(" ".join(current))
                current = []
    if current:
        phrases.append(" ".join(current))
    return phrases

def calculate_phrase_weights(original_text, translated_phrases):
    fr_phrase_word_counts = [len(phrase.split()) for phrase in translated_phrases]
    total_fr_words = sum(fr_phrase_word_counts)
    
    if total_fr_words == 0:
        return [1/len(translated_phrases)] * len(translated_phrases)
    
    return [count / total_fr_words for count in fr_phrase_word_counts]

# ============== TTS Functions ==============
async def synthesize_phrase_edge_hybrid(
    phrase: str,
    output_path: str,
    connector: aiohttp.TCPConnector,
    voice: str = "fr-FR-DeniseNeural",
    rate: str = "+0%"
):
    max_retries = 3
    delay_seconds = 1
    for attempt in range(max_retries):
        try:
            # Each TTS call creates its own session using the shared connector.
            async with aiohttp.ClientSession(connector=connector, timeout=aiohttp.ClientTimeout(total=30)) as session:
                communicate = edge_tts.Communicate(
                    text=phrase,
                    voice=voice,
                    rate=rate,
                    connector=connector
                )
                await communicate.save(output_path)
                return
        except (ClientConnectorError, ConnectionResetError, Exception) as e:
            print(f"[Error] Hybrid TTS synthesis failed for phrase: '{phrase}' on attempt {attempt+1}/{max_retries}: {e}")
            if attempt < max_retries - 1:
                await asyncio.sleep(delay_seconds)
                delay_seconds *= 2
            else:
                raise e

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Persistent Connector Creation ==============
def create_persistent_connector():
    ssl_context = ssl.create_default_context()
    connector = aiohttp.TCPConnector(ssl=ssl_context, limit=10)
    return connector

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_path, review_file_path, persistent_connector):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    
    offset_threshold = 0.05
    
    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        voice_speed_override = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        silence_duration_override = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0
        
        french_phrases = split_french_phrases(final_translation)
        weights = calculate_phrase_weights(original_text, french_phrases)
        
        phrase_audios = []
        for i, phrase in enumerate(french_phrases):
            phrase_duration = target_duration * weights[i]
            temp_path = os.path.join(tempfile.gettempdir(), f"temp_phrase_{idx}_{i}.mp3")
            try:
                await synthesize_phrase_edge_hybrid(
                    phrase, temp_path, connector=persistent_connector, voice="fr-FR-DeniseNeural", rate=voice_speed_override
                )
                if os.path.exists(temp_path) and os.path.getsize(temp_path) > 44:
                    try:
                        audio = AudioSegment.from_mp3(temp_path)
                        audio = adjust_audio_duration(audio, phrase_duration)
                        phrase_audios.append(audio)
                    except Exception as e:
                        print(f"[Warning] Ignoring corrupted audio file: {temp_path}. Error: {e}")
                else:
                    print(f"[Warning] Missing or invalid file: {temp_path}")
            finally:
                if os.path.exists(temp_path):
                    os.remove(temp_path)
        
        group_audio = AudioSegment.silent(duration=0)
        for audio in phrase_audios:
            group_audio += audio
            group_audio += AudioSegment.silent(duration=silence_duration_override)
        
        group_audio = group_audio[:int(target_duration * 1000)]
        generated_duration = group_audio.duration_seconds
        time_diff = target_duration - generated_duration
        
        if abs(time_diff) > offset_threshold:
            speed_factor = target_duration / generated_duration
            print(f"Segment {group[0].index} : adjusting speed, factor={speed_factor:.3f}")
            group_audio = change_playback_speed(group_audio, speed_factor)
        
        required_start_ms = int(group_start * 1000)
        current_duration_ms = len(combined_audio)
        if required_start_ms > current_duration_ms:
            silence = AudioSegment.silent(duration=required_start_ms - current_duration_ms)
            combined_audio += silence
        
        combined_audio += group_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed:** {voice_speed_override}\n"
            f"**Silence Duration:** {silence_duration_override} ms\n"
            f"**French Phrases:** {french_phrases}\n"
            f"**Phrase Weights:** {weights}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            f"**Generated Duration before adjustment:** {generated_duration:.2f}s\n"
            f"**Time Diff:** {time_diff:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
    
    with open(debug_log_path, 'w', encoding='utf-8') as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_path}")
    
    return output_audio_path

# ============== Merge Audio and Video Function (unchanged) ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    persistent_connector = create_persistent_connector()  # Create the persistent connector
    try:
        print("Extracting audio...")
        audio_path = extract_audio()
    
        print("Transcribing audio...")
        language, segments = transcribe(audio_path)
    
        print("Generating English subtitles...")
        generate_subtitle_file(segments, subtitle_file_en)
    
        print("Generating French audio with synchronization and manual overrides...")
        await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file, persistent_connector)
    
        print("Merging audio and video...")
        merge_audio_video()
    
        print(f"Process completed! Output video: {output_video}")
    finally:
        await persistent_connector.close()  # Close the persistent connector

if __name__ == "__main__":
    asyncio.run(async_main())


UPDATE AFTER VIEWING VIDEO : adjust speed and silence per words or phrases
Translation Review File
You can update the following properties for each segment:
  **Final Translation:** Your updated French text
  **Voice Speed:** Rate modifier such as '+0%', '+10%', '-5%', etc. (default '+0%')
  **Silence Duration:** Silence (in ms) to append (default 100 ms)
---

In [1]:
import os
import re
import ffmpeg
import pysrt
import time
from deep_translator import GoogleTranslator
from pydub import AudioSegment
from moviepy.editor import VideoFileClip, AudioFileClip
from faster_whisper import WhisperModel
from shutil import which
import nest_asyncio
from datetime import datetime
import tempfile
import asyncio
import edge_tts
import aiohttp
import ssl
import random
import concurrent.futures

nest_asyncio.apply()

# ----- Configuration -----
ffmpeg_path = which("ffmpeg")
if not ffmpeg_path:
    raise RuntimeError("ffmpeg not found. Please install ffmpeg first.")
print(f"✅ ffmpeg found at: {ffmpeg_path}")

input_video = "to translate/4.2.2_Flux de navigation_Avr_08_Latest.mp4"
base_name = os.path.splitext(os.path.basename(input_video))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"{base_name}_run_{timestamp}"
model_size = "small"
update_existing = True

# We rely solely on cloud-based Edge TTS.
USE_EDGE_TTS = True

# Files and paths
os.makedirs(output_dir, exist_ok=True)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
extracted_audio = os.path.join(output_dir, f"{input_video_name}-extracted-audio.wav")
subtitle_file_en = os.path.join(output_dir, f"{input_video_name}-english.srt")
translated_audio = os.path.join(output_dir, f"{input_video_name}-french.wav")
output_video = os.path.join(output_dir, f"{input_video_name}-french.mp4")
review_file = os.path.join(output_dir, "translation_review.txt")
debug_log_file = os.path.join(output_dir, "translation_debug_log.txt")

# ============== Helper Functions (extract_audio, transcribe, etc.) ==============
def extract_audio():
    try:
        (ffmpeg
         .input(input_video)
         .output(extracted_audio, ac=1, ar=16000)
         .overwrite_output()
         .run(capture_stdout=True, capture_stderr=True)
        )
        return extracted_audio
    except ffmpeg.Error as e:
        print("STDOUT:", e.stdout.decode("utf8"))
        print("STDERR:", e.stderr.decode("utf8"))
        raise

def transcribe(audio_path):
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    language = info.language
    print(f"Detected language: {language}")
    transcript_segments = []
    for segment in segments:
        transcript_segments.append({
            "start": segment.start,
            "end": segment.end,
            "text": segment.text.strip()
        })
    return language, transcript_segments

def time_to_subrip(seconds: float) -> pysrt.SubRipTime:
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    milliseconds = int((seconds - int(seconds)) * 1000)
    return pysrt.SubRipTime(hours=hours, minutes=minutes, seconds=int(seconds), milliseconds=milliseconds)

def generate_subtitle_file(segments, output_path):
    subs = pysrt.SubRipFile()
    for i, segment in enumerate(segments, 1):
        sub = pysrt.SubRipItem(
            index=i,
            start=time_to_subrip(segment["start"]),
            end=time_to_subrip(segment["end"]),
            text=segment["text"]
        )
        subs.append(sub)
    subs.save(output_path, encoding="utf-8")
    return output_path

# ============== Translation & Review Functions ==============
def generate_translation_review_file(source_path, review_file_path, from_lang="en", to_lang="fr"):
    translator = GoogleTranslator(source=from_lang, target=to_lang)
    subs = pysrt.open(source_path)
    groups = []
    current_group = []
    sentence_end_pattern = re.compile(r"[.!?]\s*$")
    for sub in subs:
        current_group.append(sub)
        if sentence_end_pattern.search(sub.text):
            groups.append(current_group)
            current_group = []
    if current_group:
        groups.append(current_group)
    with open(review_file_path, "w", encoding="utf-8") as f:
        f.write("Translation Review File\n")
        f.write("Please update the French text in the **Final Translation:** field below.\n")
        f.write("DO NOT change the keys (**Final Translation:**, **Voice Speed:**, **Silence Duration:**).\n")
        f.write("----------------------------------------------------------------\n")
        for i, group in enumerate(groups, 1):
            group_start = group[0].start.ordinal / 1000
            group_end = group[-1].end.ordinal / 1000
            original_text = " ".join([sub.text for sub in group])
            auto_translated = translator.translate(text=original_text)
            default_voice_speed = "+0%"
            default_silence = "100"
            f.write(f"Segment {i} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n")
            f.write(f"**Original:** {original_text}\n")
            f.write(f"**Auto Translated:** {auto_translated}\n")
            f.write(f"**Final Translation:** {auto_translated}\n")
            f.write(f"**Voice Speed:** {default_voice_speed}\n")
            f.write(f"**Silence Duration:** {default_silence}\n")
            f.write("----------------------------------------------------------------\n")
    print(f"Review file created at: {review_file_path}")
    print("Please review and update the final translations as needed.")
    while True:
        user_confirmation = input("Type 'Y' when ready to continue: ").strip().lower()
        if user_confirmation == "y":
            break
    return groups

def parse_review_overrides(review_file_path):
    segments_overrides = []
    with open(review_file_path, "r", encoding="utf-8") as f:
        content = f.read()
    blocks = [blk.strip() for blk in content.split("----------------------------------------------------------------") if blk.strip()]
    for blk in blocks:
        final_translation = None
        voice_speed = "+0%"
        silence_duration = 100.0  # default in ms
        for line in blk.splitlines():
            if line.startswith("**Final Translation:**"):
                final_translation = line.split("**Final Translation:**", 1)[1].strip()
            elif line.startswith("**Voice Speed:**"):
                voice_speed = line.split("**Voice Speed:**", 1)[1].strip()
            elif line.startswith("**Silence Duration:**"):
                try:
                    silence_duration = float(line.split("**Silence Duration:**", 1)[1].strip())
                except ValueError:
                    silence_duration = 100.0
        if final_translation:
            segments_overrides.append({
                "final_translation": final_translation,
                "voice_speed": voice_speed,
                "silence_duration": silence_duration
            })
    print("Parsed review file overrides:")
    for idx, override in enumerate(segments_overrides, 1):
        print(f"  Segment {idx} final translation: {override['final_translation']}")
    return segments_overrides

# ============== Audio Synchronization Functions ==============
def adjust_audio_duration(audio, target_duration):
    current_duration = audio.duration_seconds
    difference = target_duration - current_duration
    if difference > 0.1:
        silence = AudioSegment.silent(duration=difference * 1000)
        return audio + silence
    elif difference < -0.1:
        trim_duration_ms = abs(difference) * 1000
        return audio[:-int(trim_duration_ms)]
    else:
        return audio

# ============== Inline Tag Parsing and SSML Generation ==============
def parse_segment_with_tags(text: str):
    """
    Parse a text segment containing inline tags.
    Supported custom tags:
      - <speed value="120%"> ... </speed> or <speed value="120%"/> (both accepted)
      - <pause duration="300ms"/> (self-closing)
    Returns a list of tuples (text, options) where options is a dict.
    """
    tag_pattern = re.compile(r"""
        (?P<pre_text>.*?)                              # text before the tag
        (?:
            <speed\s+value=["'](?P<speed>[\d.+%-]+)["']>  # speed tag opening
            (?P<speed_text>.+?)(?:</speed>|<speed\s*/>)    # speed tag content and closing
          |
            <pause\s+duration=["'](?P<pause>[\d.]+ms)["']\s*/>  # self-closing pause tag
        )
    """, re.VERBOSE | re.DOTALL)
    
    results = []
    pos = 0
    while pos < len(text):
        match = tag_pattern.search(text, pos)
        if not match:
            remaining = text[pos:]
            if remaining:
                results.append((remaining, {}))
            break
        if match.group("pre_text"):
            results.append((match.group("pre_text"), {}))
        if match.group("speed"):
            results.append((match.group("speed_text"), {"speed": match.group("speed").strip()}))
        elif match.group("pause"):
            results.append(("", {"pause": match.group("pause").strip()}))
        pos = match.end()
    return results

def generate_ssml_for_phrase(text: str, default_speed: str, default_pause: int) -> str:
    """
    Generates an SSML string from a text segment that may include inline tags.
    Replaces custom tags with valid SSML instructions and wraps the result in <speak> tags.
    An XML declaration is also prepended to ensure proper SSML processing.
    """
    segments = parse_segment_with_tags(text)
    ssml_parts = []
    for seg_text, options in segments:
        if "pause" in options:
            ssml_parts.append(f'<break time="{options["pause"]}"/>')
        elif "speed" in options:
            spd = options["speed"]
            ssml_parts.append(f'<prosody rate="{spd}">{seg_text}</prosody>')
        else:
            ssml_parts.append(f'<prosody rate="{default_speed}">{seg_text}</prosody>')
    full_ssml = "".join(ssml_parts) + f'<break time="{default_pause}ms"/>'
    # Prepend XML declaration and wrap in speak element.
    return f'<?xml version="1.0"?><speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="fr-FR">{full_ssml}</speak>'

# ============== TTS Functions: Edge TTS Only with Debug Logging ==============
async def robust_synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", ssml: str = None, max_retries: int = 5):
    """
    Synthesize speech using Edge TTS with robust retry logic.
    If an SSML string is provided, it is passed as the text parameter.
    Detailed debug messages are printed for each attempt.
    """
    for attempt in range(max_retries):
        try:
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
                if ssml is not None:
                    communicate = edge_tts.Communicate(text=ssml, voice=voice)
                else:
                    communicate = edge_tts.Communicate(text=phrase, voice=voice, rate=rate)
                print(f"[Debug] Attempt {attempt+1}: Synthesizing phrase using {'SSML' if ssml else 'plain text'}: '{phrase}'")
                await communicate.save(output_path)
                print(f"[Debug] Phrase synthesized successfully to {output_path}")
                return
        except Exception as e:
            wait_time = 2 ** attempt + random.uniform(0, 1)
            print(f"[Error] Attempt {attempt+1}/{max_retries} failed for phrase: '{phrase}'. Exception: {e}")
            print(f"[Debug] Retrying in {wait_time:.2f} seconds...")
            await asyncio.sleep(wait_time)
    raise Exception(f"Failed to synthesize phrase after {max_retries} attempts: {phrase}")

async def synthesize_phrase(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", ssml: str = None):
    await robust_synthesize_phrase(phrase, output_path, voice, rate, ssml)

# For backward compatibility:
async def synthesize_phrase_edge_hybrid(phrase: str, output_path: str, voice: str = "fr-FR-DeniseNeural", rate: str = "+0%", ssml: str = None):
    await synthesize_phrase(phrase, output_path, voice, rate, ssml)

def change_playback_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    altered_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return altered_sound.set_frame_rate(sound.frame_rate)

# ============== Updated Async Audio Generation Function ==============
async def async_generate_translated_audio_with_sync_using_review(subtitle_source_path, output_audio_path, debug_log_file, review_file_path):
    groups = generate_translation_review_file(subtitle_source_path, review_file_path)
    overrides = parse_review_overrides(review_file_path)
    subs = pysrt.open(subtitle_source_path)
    grouped_subs = groups
    combined_audio = AudioSegment.silent(duration=0)
    debug_lines = []
    offset_threshold = 0.05

    for idx, group in enumerate(grouped_subs):
        group_start = group[0].start.ordinal / 1000
        group_end = group[-1].end.ordinal / 1000
        target_duration = group_end - group_start
        original_text = " ".join([sub.text for sub in group])
        final_translation = overrides[idx]["final_translation"] if idx < len(overrides) else original_text
        seg_voice_speed = overrides[idx]["voice_speed"] if idx < len(overrides) else "+0%"
        seg_silence_duration = overrides[idx]["silence_duration"] if idx < len(overrides) else 100.0

        print(f"[Debug] Segment {idx+1} final translation: {final_translation}")
        # Generate SSML including inline tags.
        ssml = generate_ssml_for_phrase(final_translation, default_speed=seg_voice_speed, default_pause=seg_silence_duration)
        print(f"[Debug] Generated SSML for segment {idx+1}: {ssml}")
        temp_segment_path = os.path.join(tempfile.gettempdir(), f"temp_segment_{idx}.mp3")
        try:
            await synthesize_phrase_edge_hybrid(final_translation, temp_segment_path, voice="fr-FR-DeniseNeural", ssml=ssml)
        except Exception as e:
            print(f"[Warning] Synthesis failed for segment {idx+1}: {e}. Skipping this segment.")
            continue

        try:
            segment_audio = AudioSegment.from_mp3(temp_segment_path)
        except Exception as e:
            print(f"[Warning] Unable to load audio from {temp_segment_path}: {e}. Skipping this segment.")
            if os.path.exists(temp_segment_path):
                os.remove(temp_segment_path)
            continue

        segment_audio = adjust_audio_duration(segment_audio, target_duration)
        combined_audio += AudioSegment.silent(duration=int(group_start * 1000) - len(combined_audio))
        combined_audio += segment_audio

        debug_entry = (
            f"Segment {group[0].index} (start: {group_start:.2f}s, end: {group_end:.2f}s):\n"
            f"**Original:** {original_text}\n"
            f"**Final Translation:** {final_translation}\n"
            f"**Voice Speed (segment):** {seg_voice_speed}\n"
            f"**Silence Duration (segment):** {seg_silence_duration} ms\n"
            f"**Generated SSML:** {ssml}\n"
            f"**Target Duration:** {target_duration:.2f}s\n"
            "---\n"
        )
        debug_lines.append(debug_entry)
        if os.path.exists(temp_segment_path):
            os.remove(temp_segment_path)

    with open(debug_log_file, "w", encoding="utf-8") as debug_file:
        debug_file.write("Translation Debug Log\n\n")
        debug_file.writelines(debug_lines)
    combined_audio.export(output_audio_path, format="wav")
    print(f"✅ Translated audio saved to: {output_audio_path}")
    print(f"📝 Debug log saved to: {debug_log_file}")
    return output_audio_path

# ============== Merge Audio and Video Function ==============
def merge_audio_video():
    video = VideoFileClip(input_video)
    audio = AudioFileClip(translated_audio)
    if audio.duration < video.duration:
        extra_silence = AudioSegment.silent(duration=(video.duration - audio.duration) * 1000)
        audio_path_temp = os.path.join(output_dir, "temp_full_audio.wav")
        audio_seg = AudioSegment.from_file(translated_audio, format="wav")
        full_audio = audio_seg + extra_silence
        full_audio.export(audio_path_temp, format="wav")
        audio = AudioFileClip(audio_path_temp)
    video = video.set_audio(audio)
    video.write_videofile(
        output_video,
        codec="libx264",
        audio_codec="aac",
        temp_audiofile="temp-audio.m4a",
        remove_temp=True,
        threads=4
    )

# ============== Main Asynchronous Flow ==============
async def async_main():
    print("Extracting audio...")
    audio_path = extract_audio()
    print("Transcribing audio...")
    language, segments = transcribe(audio_path)
    print("Generating English subtitles...")
    generate_subtitle_file(segments, subtitle_file_en)
    print("Generating French audio with synchronization and manual overrides...")
    await async_generate_translated_audio_with_sync_using_review(subtitle_file_en, translated_audio, debug_log_file, review_file)
    print("Merging audio and video...")
    merge_audio_video()
    print(f"Process completed! Output video: {output_video}")

if __name__ == "__main__":
    asyncio.run(async_main())


✅ ffmpeg found at: C:\ffmpeg\bin\ffmpeg.EXE
Extracting audio...
Transcribing audio...
Detected language: en
Generating English subtitles...
Generating French audio with synchronization and manual overrides...
Review file created at: 4.2.2_Flux de navigation_Avr_08_Latest_run_20250416_084328\translation_review.txt
Please review and update the final translations as needed.
Parsed review file overrides:
  Segment 1 final translation: Dans cette démo, nous explorerons comment créer un flux de navigation, comment nous pouvons le personnaliser et attribuer des autorisations aux groupes d'utilisateurs.
  Segment 2 final translation: Le flux de navigation améliore l'expérience utilisateur avec des voies structurées intuitives pour la navigation sans effort à travers les modules et les tâches.
  Segment 3 final translation: Permet une petite transition entre la saisie des données, les rapports et la gestion des processus, l'optimisation de l'efficacité avec les flux de travail de planification 

                                                                     

MoviePy - Done.
Moviepy - Writing video 4.2.2_Flux de navigation_Avr_08_Latest_run_20250416_084328\4.2.2_Flux de navigation_Avr_08_Latest-french.mp4



                                                                   

Moviepy - Done !
Moviepy - video ready 4.2.2_Flux de navigation_Avr_08_Latest_run_20250416_084328\4.2.2_Flux de navigation_Avr_08_Latest-french.mp4
Process completed! Output video: 4.2.2_Flux de navigation_Avr_08_Latest_run_20250416_084328\4.2.2_Flux de navigation_Avr_08_Latest-french.mp4
