**Lip synchronization dubbing from albanian to [language]**

Imports

In [2]:
import os
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.api_core.client_options import ClientOptions
from google.cloud import speech_v2
from google.api_core.exceptions import NotFound

Google cloud data

In [3]:
PROJECT_ID = "890676014334"
LOCATION = "europe-west4"

# Chirp 2 is only available in certain locations
client_options_var = ClientOptions(api_endpoint="europe-west4-speech.googleapis.com")

# Initialize the client
client = speech_v2.SpeechClient(client_options=client_options_var)
recogniser = f"projects/{PROJECT_ID}/locations/{LOCATION}/recognizers/albanian-recogniser"

Audio extraction

In [None]:
import subprocess
import os
def extract_audio_ffmpeg(video_file, output_folder):
    try:
        # Create the output folder if it doesn't exist.
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # Construct the output audio file name.
        video_file_name = os.path.splitext(os.path.basename(video_file))[0]
        audio_file_path = os.path.join(output_folder, f"{video_file_name}.wav")
        """
        # Build the FFmpeg command.
        command = [
            "ffmpeg",
            "-i",
            video_file,
            "-vn", # No video
            "-acodec",
            "pcm_s16le",  # Lossless, signed 16-bit PCM (linear16)
            "-ar",
            "16000",      # Sampling rate 16kHz
            audio_file_path,
        ]
        """
        command = [
            "ffmpeg",
            "-i",
            video_file,
            "-vn", # No video
            "-acodec",
            "flac",  # Lossless, signed 16-bit PCM (linear16)
            "-ac",   # Number of audio channels
            "1",     # Mono
            "-sample_fmt",
            "s16",  # Signed 16-bit PCM
            "-ar",
            "16000",      # Sampling rate 16kHz
            audio_file_path,
        ]
        
        
        # Execute the FFmpeg command using subprocess.
        subprocess.run(command, check=True, capture_output=True) #capture_output=True added

        print(f"Audio extracted from '{video_file}' and saved as '{audio_file_path}'")

    except subprocess.CalledProcessError as e:
        print(f"Error processing {video_file}: FFmpeg error: {e.stderr.decode()}") #e.stderr added
    except Exception as e:
        print(f"Error processing {video_file}: {e}")

def process_videos_in_folder_ffmpeg(video_folder, output_folder):
    # Get a list of all files in the video folder.
    for filename in os.listdir(video_folder):
        # Construct the full path to the file.
        video_file = os.path.join(video_folder, filename)

        # Check if the file is a video file.
        if filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.wmv')):
            # Extract the audio from the video file using FFmpeg.
            extract_audio_ffmpeg(video_file, output_folder)
        else:
            print(f"Skipping non-video file: {video_file}")


video_folder = "videos"
output_folder = "extracted_audios_flac_mono"

# Process all video files in the specified folder using FFmpeg.
process_videos_in_folder_ffmpeg(video_folder, output_folder)


Transcribing the videos

In [5]:
import base64
import subprocess

def transcribe_audio_file(audio_path, client, recogniser, base_name):
    # Read file as bytes to send to Google API
    try:
        with open(audio_path, "rb") as f:
            audio_content = f.read()
    except Exception as e:
        print(f"Error reading file {audio_path}: {e}")
        return
    
    try:
        config = speech_v2.RecognitionConfig(
            auto_decoding_config=speech_v2.AutoDetectDecodingConfig(),
            language_codes=["sq-AL"], 
            model="chirp_2"
            #features=speech_v2.RecognitionFeatures(
            #    enable_word_time_offsets=False,
            #),
        )
                
        request = speech_v2.RecognizeRequest(
            recognizer=recogniser,
            config=config,
            content=audio_content
        )
                
        # Transcribes the audio into text
        response = client.recognize(request=request)
        
        if response.results:
           top_alternative = response.results[0].alternatives[0]
           confidence = top_alternative.confidence
           transcript = top_alternative.transcript

           with open(base_name, "a", encoding="utf-8") as f:
               f.write(f"{audio_path}:{confidence}:{transcript}\n")
        else:
            print(f"No transcription results for {audio_path}")

    except Exception as e:
        print(f"Error transcribing {audio_path}: {e}")

def process_audio_folder():    
    # Folder containing audio files
    audio_folder = "extracted_audios"  # Change this to your folder path
    
    # Base name for the transcription file
    base_name = audio_folder + "/transcription_alb.txt"
    
    # Supported audio file extensions
    audio_extensions = ['.wav', '.mp3', '.flac', '.ogg', '.m4a']
    
    # Check if folder exists
    if not os.path.isdir(audio_folder):
        print(f"Error: Folder '{audio_folder}' does not exist")
        return
    
    # Process each audio file in the folder
    for file in os.listdir(audio_folder):
        file_path = os.path.join(audio_folder, file)
        
        # Check if it's a file and has an audio extension
        if os.path.isfile(file_path) and any(file.lower().endswith(ext) for ext in audio_extensions):
            transcribe_audio_file(file_path, client, recogniser, base_name)
        else:
            print(f"Skipping non-audio file: {file}")

process_audio_folder()

Error transcribing extracted_audios\20250421_195822.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250421_201934.wav: 400 Request payload size exceeds the limit: 10485760 bytes.
Error transcribing extracted_audios\20250421_203247.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_115126.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_115317.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_125651.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_125844.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_130234.wav: 400 Audio can be of a maximum of 60 seconds.
Error transcribing extracted_audios\20250429_130503.wav: 400 Audio can be of a maximum of 60 seconds.


KeyboardInterrupt: 

Translating the transcription file

In [None]:
from googletrans import Translator

input_file = "transcription_alb.txt"
output_file = "transcription_eng.txt"

target_language = "en" # en (english), de (deutch), ja (japonese)
translator = Translator()

try:
    with open(input_file, 'r', encoding='utf-8') as infile, \
        open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            line = line.strip()
            if line:  # Ensure the line is not empty
                parts = line.split(':', 2)  # Split into at most 3 parts
                if len(parts) == 3:
                    file_name, confidence_score, transcription_text = parts
                    try:
                        translation = translator.translate(transcription_text, src='sq', dest=target_language)
                        translated_line = f"{file_name}:{confidence_score}:{translation.text}"
                        outfile.write(translated_line + '\n')
                    except Exception as e:
                        print(f"Error translating line: {line} - {e}")
                        outfile.write(line + '\n')  # Write the original line if translation fails
                else:
                    print(f"Skipping invalid line format: {line}")
                    outfile.write(line + '\n')  # Write the original line if format is wrong
except FileNotFoundError:
    print(f"Error: Input file '{input_file}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Voice cloning from the transcription\own voice