<a href="https://colab.research.google.com/github/tafsirnetlifyapp/quran/blob/main/flashquransrt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title 1. Upload File
# @markdown Klik untuk memproses

!pip install -U faster-whisper moviepy srt
import os, urllib.request
from faster_whisper import WhisperModel
import srt
from datetime import timedelta
from google.colab import files
import ipywidgets as ipw
from IPython.display import display, clear_output

# --- UI: Upload atau Link
upload_btn = ipw.FileUpload(accept=".mp3,.wav,.m4a", multiple=False)
link_input = ipw.Text(value="", placeholder="https://...", description="Link:")
process_btn = ipw.Button(description="🎧 Transkripsi Sekarang", button_style="success")
output_box = ipw.Output()

display(ipw.HTML("<h4>🔊 Pilih sumber audio:</h4>"))
display(ipw.HBox([
    ipw.VBox([ipw.Label("📤 Upload File:"), upload_btn]),
    ipw.VBox([ipw.Label("🌐 Link Audio:"), link_input])
]))
display(process_btn, output_box)

def transcribe_audio(audio_path):
    print("⚙️ Memuat model Whisper (large-v3)...")
    model = WhisperModel("large-v3", device="cuda", compute_type="float16")

    print("🧠 Melakukan transkripsi...")
    segments_generator, _ = model.transcribe(
        audio_path,
        language="ar",
        beam_size=5,
        vad_filter=True,
        vad_parameters=dict(min_silence_duration_ms=300)
    )
    segments = list(segments_generator)

    print(f"📋 Total segmen: {len(segments)}")

    srt_entries = []
    for i, seg in enumerate(segments):
        srt_entries.append(srt.Subtitle(
            index=i + 1,
            start=timedelta(seconds=seg.start),
            end=timedelta(seconds=seg.end),
            content=seg.text.strip()
        ))

    srt_filename = "output.srt"
    with open(srt_filename, "w", encoding="utf-8") as f:
        f.write(srt.compose(srt_entries))

    files.download(srt_filename)
    print("✅ Selesai. Subtitle disimpan sebagai output.srt")

def on_process_clicked(_):
    with output_box:
        clear_output()
        # Upload
        if upload_btn.value:
            audio_file = next(iter(upload_btn.value.values()))
            audio_ext = audio_file['metadata']['type'].split("/")[-1]
            audio_path = f"uploaded_audio.{audio_ext}"
            with open(audio_path, "wb") as f:
                f.write(audio_file['content'])
            print(f"📥 File audio diunggah: {audio_path}")
            transcribe_audio(audio_path)

        # Link
        elif link_input.value.strip():
            try:
                audio_path = "linked_audio.mp3"
                print("🌐 Mengunduh audio...")
                urllib.request.urlretrieve(link_input.value.strip(), audio_path)
                print(f"📥 Audio diunduh sebagai: {audio_path}")
                transcribe_audio(audio_path)
            except Exception as e:
                print(f"❌ Gagal mengunduh: {e}")

        else:
            print("⚠️ Harap unggah file atau masukkan link audio terlebih dahulu.")

process_btn.on_click(on_process_clicked)


In [None]:
# @title 2. Transkrip Audio
# @markdown Klik untuk memproses
import json, urllib.request, srt, re
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- Ambil Quran JSON ---
quran_url = "https://raw.githubusercontent.com/tafsirnetlifyapp/quran/refs/heads/main/quran2.json"
quran_data = json.load(urllib.request.urlopen(quran_url))

# --- Surah dan Jumlah Ayat ---
surah_names = ["Al-Fatihah", "Al-Baqarah", "Ali 'Imran", "An-Nisa'", "Al-Ma'idah", "Al-An'am", "Al-A'raf", "Al-Anfal", "At-Tawbah", "Yunus", "Hud", "Yusuf", "Ar-Ra'd", "Ibrahim", "Al-Hijr", "An-Nahl", "Al-Isra'", "Al-Kahf", "Maryam", "Ta-Ha", "Al-Anbiya'", "Al-Hajj", "Al-Mu'minun", "An-Nur", "Al-Furqan", "Ash-Shu'ara", "An-Naml", "Al-Qasas", "Al-'Ankabut", "Ar-Rum", "Luqman", "As-Sajda", "Al-Ahzab", "Saba'", "Fatir", "Ya-Sin", "As-Saffat", "Sad", "Az-Zumar", "Ghafir", "Fussilat", "Ash-Shura", "Az-Zukhruf", "Ad-Dukhan", "Al-Jathiya", "Al-Ahqaf", "Muhammad", "Al-Fath", "Al-Hujurat", "Qaf", "Adh-Dhariyat", "At-Tur", "An-Najm", "Al-Qamar", "Ar-Rahman", "Al-Waqi'a", "Al-Hadid", "Al-Mujadila", "Al-Hashr", "Al-Mumtahana", "As-Saff", "Al-Jumu'a", "Al-Munafiqun", "At-Taghabun", "At-Talaq", "At-Tahrim", "Al-Mulk", "Al-Qalam", "Al-Haqqa", "Al-Ma'arij", "Nuh", "Al-Jinn", "Al-Muzzammil", "Al-Muddaththir", "Al-Qiyama", "Al-Insan", "Al-Mursalat", "An-Naba'", "An-Nazi'at", "Abasa", "At-Takwir", "Al-Infitar", "Al-Mutaffifin", "Al-Inshiqaq", "Al-Buruj", "At-Tariq", "Al-A'la", "Al-Ghashiyah", "Al-Fajr", "Al-Balad", "Ash-Shams", "Al-Lail", "Ad-Duhaa", "Ash-Sharh", "At-Tin", "Al-'Alaq", "Al-Qadr", "Al-Bayyina", "Az-Zalzalah", "Al-'Adiyat", "Al-Qari'a", "At-Takathur", "Al-Asr", "Al-Humazah", "Al-Fil", "Quraysh", "Al-Ma'un", "Al-Kawthar", "Al-Kafirun", "An-Nasr", "Al-Masad", "Al-Ikhlas", "Al-Falaq", "An-Nas"]
jumlah_ayat = [7,286,200,176,120,165,206,75,129,109,123,111,43,52,99,128,111,110,98,135,112,78,118,64,77,227,93,88,69,60,34,30,73,54,45,83,182,88,75,85,54,53,89,59,37,35,38,29,18,45,60,49,62,55,78,96,29,22,24,13,14,11,11,18,12,12,30,52,52,44,28,28,20,56,40,31,50,40,46,42,29,19,36,25,22,17,19,26,30,20,15,21,11,8,8,19,5,8,8,11,11,8,3,9,5,4,7,3,6,3,5,4,5,6]

# --- Normalisasi & Ekstraksi ---
def normalize_arabic(w): return w.replace("أ","ا").replace("إ","ا").replace("آ","ا")
def extract_words(text): return [normalize_arabic(w) for w in re.sub(r"[^\w\s]|[\d_]", "", text).split()]

# --- UI Builder ---
container = widgets.VBox()
entry_controls = []

def make_entry():
    surah_dd = widgets.Dropdown(
        options=[(f"{i+1:03d} - {surah_names[i]}", i+1) for i in range(114)],
        description="Surah",
        layout=widgets.Layout(width="230px")  # 🔹 dipersempit
    )
    ayat_from = widgets.Dropdown(
        description="Dari",
        layout=widgets.Layout(width="155px")  # 🔹 kecil
    )
    ayat_to = widgets.Dropdown(
        description="Sampai",
        layout=widgets.Layout(width="155px")  # 🔹 kecil
    )
    taawudz_cb = widgets.Checkbox(description="Taawudz")
    basmalah_cb = widgets.Checkbox(description="Basmalah")
    btn_hapus = widgets.Button(
        description="Hapus",
        button_style="danger",
        layout=widgets.Layout(width="80px")  # 🔹 diperkecil
    )

    def update(change):
        idx = change["new"] - 1
        ayat_from.options = list(range(1, jumlah_ayat[idx]+1))
        ayat_to.options = list(range(1, jumlah_ayat[idx]+1))
        ayat_from.value = 1
        ayat_to.value = jumlah_ayat[idx]
    surah_dd.observe(update, names="value")
    update({"new": 98})

    def hapus_click(b):
        container.children = [child for child in container.children if child != box]
    btn_hapus.on_click(hapus_click)

    box = widgets.VBox([
        widgets.HBox([surah_dd, btn_hapus]),
        widgets.HBox([ayat_from, ayat_to]),
        widgets.VBox([taawudz_cb, basmalah_cb])
    ], layout=widgets.Layout(border='1px solid #ccc', padding='10px', margin='5px 0'))

    return box, (surah_dd, ayat_from, ayat_to, taawudz_cb, basmalah_cb)

def add_entry(_):
    ui, ctrl = make_entry()
    entry_controls.append(ctrl)
    container.children += (ui,)

add_btn = widgets.Button(description="➕ Tambah Entri", button_style="info")
add_btn.on_click(add_entry)

analyze_btn = widgets.Button(description="🔍 Analisis & Generate", button_style="success")
output_box = widgets.Output()
add_entry(None)

# --- Analisis dan Replace SRT ---
def do_analysis(_):
    with output_box:
        clear_output()
        try:
            with open("output.srt", "r", encoding="utf-8") as f:
                srt_entries = list(srt.parse(f.read()))
        except:
            print("❌ File output.srt tidak ditemukan.")
            return

        quran_words = []
        for surah_dd, from_dd, to_dd, taawudz_cb, basmalah_cb in entry_controls:
            if taawudz_cb.value:
                quran_words += ["اعوذ", "بالله", "من", "الشيطان", "الرجيم"]
            if basmalah_cb.value:
                quran_words += ["بسم", "الله", "الرحمن", "الرحيم"]
            for ayat in range(from_dd.value, to_dd.value + 1):
                key = f"{surah_dd.value}:{ayat}"
                if key in quran_data:
                    quran_words += [w.strip() for w in quran_data[key].split("|") if w.strip()]
                else:
                    print(f"⚠️ Ayat tidak ditemukan: {key}")

        print(f"📦 Jumlah kata dari Quran: {len(quran_words)}")

        srt_words = []
        for entry in srt_entries:
            srt_words.extend(extract_words(entry.content))
        print(f"📄 Jumlah kata dari SRT: {len(srt_words)}")

        # --- Pencocokan & Pendeteksian pengulangan ---
        qi = si = 0
        mismatches = []
        repeat_log = []

        while qi < len(quran_words) and si < len(srt_words):
            q = normalize_arabic(quran_words[qi])
            s = srt_words[si]

            if q == s:
                qi += 1
                si += 1
            elif si > 0 and s == srt_words[si - 1]:
                repeat_log.append(f"↻ Duplikat di SRT: '{s}' (SRT pos {si+1})")
                si += 1
            elif qi > 0 and q == normalize_arabic(quran_words[qi - 1]):
                repeat_log.append(f"↻ Duplikat di Quran: '{quran_words[qi]}' (Quran pos {qi+1})")
                qi += 1
            elif si + 1 < len(srt_words) and normalize_arabic(quran_words[qi]) == srt_words[si + 1]:
                repeat_log.append(f"↻ Kata SRT dilewati: '{s}' (SRT pos {si+1})")
                si += 1
            elif qi + 1 < len(quran_words) and normalize_arabic(quran_words[qi + 1]) == s:
                repeat_log.append(f"↻ Kata Quran dilewati: '{quran_words[qi]}' (Quran pos {qi+1})")
                qi += 1
            else:
                mismatches.append((qi, quran_words[qi], srt_words[si]))
                qi += 1
                si += 1

        # --- Catat sisa jika belum habis ---
        while qi < len(quran_words):
            mismatches.append((qi, quran_words[qi], "∅ (kosong di SRT)"))
            qi += 1
        while si < len(srt_words):
            mismatches.append((qi, "∅ (kosong di Quran)", srt_words[si]))
            si += 1

        # --- Tampilkan hasil ---
        if repeat_log:
            print("\n🔁 Pengulangan Kata Terdeteksi:")
            for log in repeat_log:
                print(f"  {log}")
        else:
            print("\n✅ Tidak ada pengulangan kata yang dilewati.")

        if mismatches:
            print("\n❌ Terdapat ketidaksesuaian (maks. 5 ditampilkan):")
            for i, q, s in mismatches[:5]:
                print(f"  ❌ Index {i+1}: Quran='{q}' ≠ SRT='{s}'")
        else:
            print("\n✅ Semua kata cocok!")

        # --- Update isi SRT ---
        result = []
        pointer = 0
        for entry in srt_entries:
            count = len(extract_words(entry.content))
            new_words = quran_words[pointer:pointer + count]
            pointer += count
            entry.content = '\u202B' + ' '.join(new_words) + '\u202C'  # RTL tanpa '|'
            result.append(entry)

        with open("output_updated.srt", "w", encoding="utf-8") as f:
            f.write(srt.compose(result))

        print("\n📁 File 'output_updated.srt' berhasil disimpan.")


analyze_btn.on_click(do_analysis)

# --- Tampilkan UI
display(widgets.VBox([
    container,
    widgets.HBox([add_btn, analyze_btn]),
    output_box
]))