**Lip synchronization dubbing from albanian to [language]**

Imports

In [13]:
import os
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.api_core.client_options import ClientOptions
from google.cloud import speech_v2
from google.api_core.exceptions import NotFound

Google cloud data

In [14]:
PROJECT_ID = "890676014334"
LOCATION = "europe-west4"

# Chirp 2 is only available in certain locations
client_options_var = ClientOptions(api_endpoint="europe-west4-speech.googleapis.com")

# Initialize the client
client = speech_v2.SpeechClient(client_options=client_options_var)
recogniser = f"projects/{PROJECT_ID}/locations/{LOCATION}/recognizers/albanian-recogniser"

Audio extraction

In [None]:
import subprocess

def extract_audio_ffmpeg(video_file, output_folder):
    """
    Extracts audio from a video file using FFmpeg.

    Args:
        video_file (str): The path to the video file.
        output_folder (str): The path to the folder where the audio file will be saved.
    """
    try:
        # 1. Create the output folder if it doesn't exist.
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # 2. Construct the output audio file name.
        video_file_name = os.path.splitext(os.path.basename(video_file))[0]
        audio_file_path = os.path.join(output_folder, f"{video_file_name}.mp3")

        # 3. Build the FFmpeg command.
        # need to make the audio 16000hz or something like that but we keep it as is for now
        command = [
            "ffmpeg",
            "-i",
            video_file,
            "-vn",
            "-acodec",
            "libmp3lame",  # Or "aac", "pcm_s16le", etc.
            audio_file_path,
        ]

        # 4. Execute the FFmpeg command using subprocess.
        subprocess.run(command, check=True, capture_output=True) #capture_output=True added

        print(f"Audio extracted from '{video_file}' and saved as '{audio_file_path}'")

    except subprocess.CalledProcessError as e:
        print(f"Error processing {video_file}: FFmpeg error: {e.stderr.decode()}") #e.stderr added
    except Exception as e:
        print(f"Error processing {video_file}: {e}")

def process_videos_in_folder_ffmpeg(video_folder, output_folder):
    """
    Processes all video files in a folder, extracting the audio from each using FFmpeg.

    Args:
        video_folder (str): The path to the folder containing the video files.
        output_folder (str): The path to the folder where the audio files will be saved.
    """
    # 1. Get a list of all files in the video folder.
    for filename in os.listdir(video_folder):
        # 2. Construct the full path to the file.
        video_file = os.path.join(video_folder, filename)

        # 3. Check if the file is a video file.
        if filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.wmv')):
            # 4. Extract the audio from the video file using FFmpeg.
            extract_audio_ffmpeg(video_file, output_folder)
        else:
            print(f"Skipping non-video file: {video_file}")


video_folder = "videos"  # Replace with the path to your video folder
output_folder = "audio_output"  # Different output folder for FFmpeg

# Process all video files in the specified folder using FFmpeg.
process_videos_in_folder_ffmpeg(video_folder, output_folder)


Transcribing the videos

In [12]:
from moviepy.editor import VideoFileClip
import base64
# need to change the imports here and improve a lot of things, audio file specs need to be checked
# use ffmpeg more as a tool for audio sampling etc

video_file = "videos\\20250403_161711.mp4" # CHANGE VIDEO FILE PATH

audio_path = "temp_audio.wav"

try: 
    video_clip = VideoFileClip(video_file)
    video_clip.audio.write_audiofile(audio_path, logger=None)
    video_clip.close()
    print(f"Audio extracted")
except:
    print(f"Error here: {e}")
 
# SAMPLE ROW
# file_name:confidence_score:transcription
# video_file_1.mp4:0.72:Po perse more djale e bere kete gje jo te ndershme.

# Base name for the transcription file
base_name = "transcription_alb.txt"

"""
    def encoded_audio(audio_file):
    with open(audio_file, "rb") as f:
        encoded_content = base64.b64encode(f.read())
    return encoded_content
"""
# Read file as bytes to send to google API
with open(audio_path, "rb") as f:
    audio_content = f.read()    

try:
    config = speech_v2.RecognitionConfig(
        auto_decoding_config=speech_v2.AutoDetectDecodingConfig(),
        language_codes=["sq-AL"], 
        model="chirp_2",
        features=cloud_speech.RecognitionFeatures(
            enable_word_time_offsets=False, # this is not tested and good chance it does not work
        ),
    )
            
    request = speech_v2.RecognizeRequest(
        recognizer=recogniser,
        config=config,
        content=audio_content
    )
            
    # Transcribes the audio into text
    # A response sample can be seen in responseResults.txt to see the format of the data
    response = client.recognize(request=request)
    
    if response.results:
       top_alternative = response.results[0].alternatives[0]
       confidence = top_alternative.confidence
       transcript = top_alternative.transcript

       with open(base_name, "a", encoding="utf-8") as f:
           f.write(f"{video_file}:{confidence}:{transcript}\n")
       print(f"Appended to {base_name}: {video_file}:{confidence}:{transcript}")

except Exception as e:
    print(f"Error: {e}")    
    print("\nRecognizer not found, bad name or something...")

# Remove the temp audio file after extracting it from the video
os.remove(audio_path)


Audio extracted
Error: 503 GOAWAY received; Error code: 0; Debug Text: session_timed_out

Recognizer not found, bad name or something...


Translating the transcription file

In [None]:
from googletrans import Translator

input_file = "transcription_alb.txt"
output_file = "transcription_eng.txt"

target_language = "en" # en (english), de (deutch), ja (japonese)
translator = Translator()

try:
    with open(input_file, 'r', encoding='utf-8') as infile, \
        open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            line = line.strip()
            if line:  # Ensure the line is not empty
                parts = line.split(':', 2)  # Split into at most 3 parts
                if len(parts) == 3:
                    file_name, confidence_score, transcription_text = parts
                    try:
                        translation = translator.translate(transcription_text, src='sq', dest=target_language)
                        translated_line = f"{file_name}:{confidence_score}:{translation.text}"
                        outfile.write(translated_line + '\n')
                    except Exception as e:
                        print(f"Error translating line: {line} - {e}")
                        outfile.write(line + '\n')  # Write the original line if translation fails
                else:
                    print(f"Skipping invalid line format: {line}")
                    outfile.write(line + '\n')  # Write the original line if format is wrong
except FileNotFoundError:
    print(f"Error: Input file '{input_file}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Voice cloning from the transcription\own voice