<a href="https://colab.research.google.com/github/ryangandi46/asr_wav2vav2_whisper/blob/main/dashboard_asr_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Jalankan sekali di Colab
!pip install -q transformers datasets torchaudio accelerate jiwer psutil GPUtil librosa noisereduce soundfile unidecode streamlit pyngrok

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.8 MB/s[0m eta [36m0

In [2]:
!pip install --upgrade streamlit



In [4]:
from google.colab import drive
drive.mount('/content/drive')

# Salin file JSON ke direktori kerja Colab
!cp "/content/drive/MyDrive/skripsi ryan/cleaned_data.json" /content/


Mounted at /content/drive


In [5]:
# app.py
%%writefile app.py

import streamlit as st
import torch
import time
import psutil
import GPUtil
import librosa
import numpy as np
from jiwer import wer
from transformers import pipeline
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from textwrap import wrap
from google.colab import drive


# =============================================================================
# Konfigurasi Halaman dan Judul Aplikasi
# =============================================================================
st.set_page_config(layout="wide", page_title="ASR Model Evaluation Dashboard")
st.title("📊 Dasbor Evaluasi Kinerja Model ASR")
st.markdown("Evaluasi perbandingan antara model **OpenAI Whisper** dan **Cahya Wav2Vec2** untuk Pengenalan Ucapan Bahasa Indonesia.")

# =============================================================================
# Fungsi Caching untuk Memuat Model (Agar tidak dimuat ulang setiap interaksi)
# =============================================================================
@st.cache_resource
def load_models():
    """Memuat model Whisper dan Wav2Vec2 hanya sekali."""
    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    # Memuat pipeline Whisper
    whisper_pipe = pipeline(
        "automatic-speech-recognition",
        model="openai/whisper-large",
        device=device
    )

    # Memuat pipeline Wav2Vec2
    w2v2_pipe = pipeline(
        "automatic-speech-recognition",
        model="cahya/wav2vec2-large-xlsr-indonesian",
        device=device,
    )
    return whisper_pipe, w2v2_pipe

# Panggil fungsi untuk memuat model dan tampilkan status
with st.spinner("Memuat model Whisper dan Wav2Vec2... Harap tunggu sebentar."):
    whisper_pipe, w2v2_pipe = load_models()
st.success("✅ Model berhasil dimuat!")

# =============================================================================
# Fungsi untuk Memuat dan Memproses Data
# =============================================================================
@st.cache_data
def load_evaluation_data(path):
    """Memuat data evaluasi yang sudah dibersihkan dari file JSON."""
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        st.error(f"File data tidak ditemukan di path: {path}. Pastikan file 'cleaned_data.json' ada.")
        return None

# =============================================================================
# Fungsi Utama Aplikasi Streamlit
# =============================================================================
def main():
    # Path ke data yang sudah dibersihkan (sesuaikan jika perlu)
    # Di Colab, path ini biasanya dimulai dengan /content/
    # === Mount Google Drive ===
    cleaned_data_path = "cleaned_data.json"

    cleaned_data = load_evaluation_data(cleaned_data_path)

    if cleaned_data is None:
        st.stop()

    N = len(cleaned_data)
    st.info(f"Mengevaluasi **{N} sampel** audio yang telah diproses.")

    # Inisialisasi untuk menyimpan hasil
    eval_results = []

    # Placeholder untuk progress bar
    progress_bar = st.progress(0)

    # Looping untuk evaluasi setiap sampel
    for i, sample_data in enumerate(cleaned_data):
        audio_path = sample_data["path"]
        ref_text = sample_data["text"]
        duration = sample_data["duration"]

        with st.expander(f"Sampel {i+1}: {os.path.basename(audio_path)} (Durasi: {duration:.2f}s)"):
            st.audio(audio_path)
            st.markdown(f"**Teks Referensi:** `{ref_text}`")

            col1, col2 = st.columns(2)

            # --- Evaluasi Whisper ---
            with col1:
                st.subheader("Whisper (small)")
                try:
                    t0 = time.time()
                    whisper_out = whisper_pipe(audio_path, generate_kwargs={"language": "id", "task": "transcribe"})
                    t1 = time.time()

                    whisper_text = whisper_out["text"].strip().lower()
                    whisper_wer = wer(ref_text, whisper_text)
                    whisper_time = t1 - t0

                    st.text(f"Output: {whisper_text}")
                    st.metric(label="Word Error Rate (WER)", value=f"{whisper_wer:.3f}")
                    st.metric(label="Waktu Inferensi (detik)", value=f"{whisper_time:.2f}")

                except Exception as e:
                    st.error(f"Error pada Whisper: {e}")
                    whisper_text, whisper_wer, whisper_time = "Error", 1.0, 0.0

            # --- Evaluasi Wav2Vec2 ---
            with col2:
                st.subheader("Wav2Vec2 (cahya)")
                try:
                    t0 = time.time()
                    w2v2_out = w2v2_pipe(audio_path)
                    t1 = time.time()

                    w2v2_text = w2v2_out["text"].strip().lower()
                    w2v2_wer = wer(ref_text, w2v2_text)
                    w2v2_time = t1 - t0

                    st.text(f"Output: {w2v2_text}")
                    st.metric(label="Word Error Rate (WER)", value=f"{w2v2_wer:.3f}")
                    st.metric(label="Waktu Inferensi (detik)", value=f"{w2v2_time:.2f}")

                except Exception as e:
                    st.error(f"Error pada Wav2Vec2: {e}")
                    w2v2_text, w2v2_wer, w2v2_time = "Error", 1.0, 0.0

            # Simpan hasil untuk analisis
            eval_results.append({
                "reference_text": ref_text, "duration": duration,
                "whisper_output": whisper_text, "whisper_wer": whisper_wer, "whisper_time": whisper_time,
                "wav2vec2_output": w2v2_text, "wav2vec2_wer": w2v2_wer, "wav2vec2_time": w2v2_time,
                "age": sample_data.get("age", "Unknown"), "gender": sample_data.get("gender", "Unknown"), "accent": sample_data.get("accent", "Unknown")
            })

        # Update progress bar
        progress_bar.progress((i + 1) / N)

    st.success("✅ Evaluasi selesai!")

    # =============================================================================
    # Ringkasan dan Visualisasi Hasil
    # =============================================================================
    st.header("Ringkasan Hasil Evaluasi")

    if eval_results:
        df_evaluation = pd.DataFrame(eval_results)

        # Ubah ke format 'long' untuk visualisasi yang lebih mudah dengan Seaborn
        df_whisper = df_evaluation[['whisper_wer', 'whisper_time']].rename(columns={'whisper_wer': 'wer', 'whisper_time': 'time'})
        df_whisper['model'] = 'Whisper'

        df_wav2vec2 = df_evaluation[['wav2vec2_wer', 'wav2vec2_time']].rename(columns={'wav2vec2_wer': 'wer', 'wav2vec2_time': 'time'})
        df_wav2vec2['model'] = 'Wav2Vec2'

        df_long = pd.concat([df_whisper, df_wav2vec2])

        # Tampilkan DataFrame Ringkasan
        summary_stats = df_long.groupby('model').agg(
            Avg_WER=('wer', 'mean'),
            Avg_Time_sec=('time', 'mean')
        ).reset_index()

        st.subheader("Perbandingan Rata-rata")
        st.dataframe(summary_stats.style.format({'Avg_WER': '{:.3f}', 'Avg_Time_sec': '{:.2f}'}))

        # Visualisasi
        st.subheader("Visualisasi Perbandingan")
        fig, axes = plt.subplots(1, 2, figsize=(16, 6))

        # Bar Chart WER
        sns.barplot(ax=axes[0], x='model', y='wer', data=df_long, palette='viridis', estimator=np.mean, errorbar='sd')
        axes[0].set_title('Rata-rata WER dengan Simpangan Baku', fontsize=14)
        axes[0].set_ylabel('WER (Word Error Rate)')
        axes[0].set_xlabel('Model')
        axes[0].grid(axis='y', linestyle='--', alpha=0.7)

        # Boxplot Distribusi WER
        sns.boxplot(ax=axes[1], x='model', y='wer', data=df_long, palette='viridis')
        axes[1].set_title('Distribusi WER per Model', fontsize=14)
        axes[1].set_ylabel('WER (Word Error Rate)')
        axes[1].set_xlabel('Model')
        axes[1].grid(axis='y', linestyle='--', alpha=0.7)
        axes[1].set_ylim(0, 1.5) # Batasi sumbu y agar lebih mudah dibaca

        st.pyplot(fig)

        # Visualisasi tambahan dari notebook Anda (distribusi data)
        st.subheader("Distribusi Data Sampel")

        df_viz = df_evaluation.copy()
        df_viz['age'] = df_viz['age'].replace('', np.nan).fillna('Unknown')
        df_viz['gender'] = df_viz['gender'].replace('', np.nan).fillna('Unknown')
        df_viz['accent'] = df_viz['accent'].replace('', np.nan).fillna('Unknown')

        fig_viz, axes_viz = plt.subplots(1, 3, figsize=(18, 5))

        sns.countplot(ax=axes_viz[0], data=df_viz, x='age', palette='YlGnBu')
        axes_viz[0].set_title('Distribusi Umur')

        sns.countplot(ax=axes_viz[1], data=df_viz, x='gender', palette='YlOrRd')
        axes_viz[1].set_title('Distribusi Gender')

        accent_counts = df_viz['accent'].value_counts()
        accent_labels_wrapped = ['\\n'.join(wrap(label, 10)) for label in accent_counts.index]
        axes_viz[2].pie(accent_counts, labels=accent_labels_wrapped, autopct='%1.1f%%', startangle=90, colors=plt.cm.Set3.colors)
        axes_viz[2].set_title('Distribusi Aksen')

        plt.tight_layout()
        st.pyplot(fig_viz)


# Jalankan fungsi utama
if __name__ == '__main__':
    main()

Writing app.py


In [6]:
from pyngrok import ngrok
import threading
import os

# 👉 Set auth token ngrok Anda
NGROK_AUTH_TOKEN = "2yKciAIRLr6uxcLCKicBduif6K4_fR8NwvQSL1Qdrqw2AuXM"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# 👉 Jalankan Streamlit di thread terpisah
def run_streamlit():
    os.system("streamlit run app.py --server.port 8501")

thread = threading.Thread(target=run_streamlit)
thread.start()

# 👉 Buat tunnel ke port 8501
public_url = ngrok.connect(8501)
print("🔗 Streamlit tersedia di:", public_url)


🔗 Streamlit tersedia di: NgrokTunnel: "https://6291-34-168-213-1.ngrok-free.app" -> "http://localhost:8501"


In [37]:
# Matikan semua proses ngrok dan streamlit yang masih aktif
!pkill streamlit
!pkill ngrok


In [35]:
# Hentikan semua proses ngrok dan streamlit yang mungkin masih berjalan
!kill $(ps aux | grep 'ngrok|streamlit' | awk '{print $2}')

^C
