In [1]:
from moviepy import VideoFileClip

def extract_audio(video_file, audio_file):

    video = VideoFileClip(video_file)
    audio = video.audio
    audio.write_audiofile(audio_file)
    audio.close()
    video.close()

extract_audio('videoplayback.mp4', 'p.wav')

MoviePy - Writing audio in p.wav


                                                                    

MoviePy - Done.




In [2]:
import speech_recognition as sr
import os
from googletrans import Translator

def read_reference_text(file_path):
    """Membaca teks referensi dari file."""
    if not os.path.exists(file_path):
        print(f"File {file_path} tidak ditemukan.")
        return None

    with open(file_path, "r", encoding="utf-8") as file:
        return file.read().strip()

In [3]:
def calculate_wer(detected_text, reference_text):
    """Menghitung Word Error Rate (WER) antara teks yang terdeteksi dan teks referensi."""
    detected_words = detected_text.lower().split()
    reference_words = reference_text.lower().split()

    # Buat matriks untuk menghitung jarak edit (Levenshtein Distance)
    n = len(reference_words)
    m = len(detected_words)
    dp = [[0 for _ in range(m + 1)] for _ in range(n + 1)]

    for i in range(n + 1):
        for j in range(m + 1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif reference_words[i - 1] == detected_words[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])

    wer = dp[n][m] / n if n > 0 else 0
    return wer * 100  # WER dalam persen

In [4]:
def transcribe_audio(file_path, reference_text_path):
    # Mengecek apakah file ada
    if not os.path.exists(file_path):
        print(f"File {file_path} tidak ditemukan.")
        return

    # Membaca teks referensi
    reference_text = read_reference_text(reference_text_path)
    if reference_text is None:
        return

    # Membuat recognizer instance
    recognizer = sr.Recognizer()

    try:
        # Membaca file audio
        with sr.AudioFile(file_path) as source:
            # Merekam audio dari file
            audio = recognizer.record(source)

        # Menggunakan Google Web Speech API untuk mengonversi audio menjadi teks
        detected_text = recognizer.recognize_google(audio, language="id-ID")

        # Menghitung Word Error Rate (WER)
        wer = calculate_wer(detected_text, reference_text)

        return detected_text, wer

    except sr.UnknownValueError:
        # Jika suara tidak dapat dikenali
        print("Google Speech Recognition tidak bisa mengenali suara.")
    except sr.RequestError as e:
        # Jika ada masalah dengan layanan Google Speech Recognition
        print(f"Error dengan layanan Google Speech Recognition; {e}")
    except Exception as e:
        # Menangani error lainnya
        print(f"Terjadi kesalahan: {e}")

In [5]:
def translate_text(text, target_language=''):
    """Fungsi untuk menerjemahkan teks ke bahasa lain."""
    translator = Translator()

    try:
        # Menerjemahkan teks ke bahasa target
        translation = translator.translate(text, dest=target_language)
        print(f"Teks setelah diterjemahkan ({target_language}): {translation.text}")
        return translation.text
    except Exception as e:
        print(f"Terjadi kesalahan saat menerjemahkan: {e}")
        return None

In [6]:
# Contoh penggunaan
file_audio = "p.wav"  # Ganti dengan path file WAV Anda
reference_text_file = "detected_text.txt"  # Path ke file referensi teks

# Transkripsi dan pengecekan WER
detected_text, wer = transcribe_audio(file_audio, reference_text_file)

# Setelah transkripsi selesai, terjemahkan teks dan cek akurasi
if detected_text:
    print(f"Teks yang dikenali: {detected_text}")
    print(f"Word Error Rate (WER): {wer:.2f}%")
    
    # Misalnya, menerjemahkan ke Bahasa Inggris
    target_language = 'en'
    translated_text = translate_text(detected_text, target_language)

Teks yang dikenali: hari ini kita ngobrol bersama Yono Bakrie Yono Bakrie Halo Bang Rani bikin podcast itu ada yang se-jam ada yang kadang setengah jam Nah aku tuh kepikiran pengen bikin podcast paling pendek di dunia ya Allah lucu kali ya Allah pendek
Word Error Rate (WER): 46.27%
Teks setelah diterjemahkan (en): Today we chat with Yono Bakrie Yono Bakrie Hello Bang Rani makes the podcast that is there as long as there are some sometimes half an hour.
