# Thai Meeting Transcription - Interactive Notebook

This notebook demonstrates how to use the Thai transcription tool interactively.

## 1. Setup and Imports

In [None]:
import whisper
from transcribe_meeting import MeetingTranscriber
from audio_utils import AudioPreprocessor
from IPython.display import Audio, display
import json

## 2. Quick Transcription (No Speakers)

In [None]:
# Set your audio file path
audio_file = "meeting.mp3"

# Load model
print("Loading Whisper model...")
model = whisper.load_model("medium")

# Transcribe
print("Transcribing...")
result = model.transcribe(audio_file, language="th", verbose=True)

# Display result
print("\n" + "="*60)
print("TRANSCRIPTION:")
print("="*60)
print(result["text"])

## 3. View Segments with Timestamps

In [None]:
# Display each segment with timestamp
for segment in result["segments"]:
    start = segment["start"]
    end = segment["end"]
    text = segment["text"]
    print(f"[{start:.2f}s - {end:.2f}s] {text}")

## 4. Full Transcription with Speaker Diarization

In [None]:
# Initialize transcriber
transcriber = MeetingTranscriber(
    whisper_model="medium",
    language="th"
)

# Set your HuggingFace token
HF_TOKEN = "your_token_here"  # Get from https://huggingface.co/settings/tokens

# Process with speaker diarization
transcriber.process_meeting(
    audio_path=audio_file,
    output_dir="./output",
    with_speakers=True,
    hf_token=HF_TOKEN
)

## 5. Load and Display Results

In [None]:
# Load JSON results
with open("./output/meeting_transcript.json", "r", encoding="utf-8") as f:
    segments = json.load(f)

# Display with speakers
current_speaker = None
for seg in segments:
    if seg["speaker"] != current_speaker:
        current_speaker = seg["speaker"]
        print(f"\n{'='*60}")
        print(f"[{current_speaker}]")
        print(f"{'='*60}")
    
    print(f"[{seg['start']:.1f}s] {seg['text']}")

## 6. Audio Preprocessing

In [None]:
# Get audio information
preprocessor = AudioPreprocessor()
info = preprocessor.get_audio_info(audio_file)

In [None]:
# Convert to WAV
wav_file = preprocessor.convert_to_wav(audio_file, sample_rate=16000)

In [None]:
# Normalize audio volume
normalized_file = preprocessor.normalize_audio(audio_file)

## 7. Compare Model Sizes

In [None]:
import time

models = ["tiny", "base", "small"]
results = {}

for model_name in models:
    print(f"\nTesting {model_name} model...")
    
    model = whisper.load_model(model_name)
    
    start_time = time.time()
    result = model.transcribe(audio_file, language="th")
    end_time = time.time()
    
    results[model_name] = {
        "time": end_time - start_time,
        "text": result["text"]
    }
    
    print(f"Time: {results[model_name]['time']:.2f}s")

# Display comparison
print("\n" + "="*60)
print("MODEL COMPARISON")
print("="*60)
for model_name, data in results.items():
    print(f"\n{model_name.upper()}: {data['time']:.2f}s")
    print(data['text'][:200] + "...")

## 8. Export to Different Formats

In [None]:
# Export as plain text
with open("transcript.txt", "w", encoding="utf-8") as f:
    f.write(result["text"])

print("Saved to transcript.txt")

In [None]:
# Export as SRT subtitle
with open("transcript.srt", "w", encoding="utf-8") as f:
    for i, seg in enumerate(result["segments"], 1):
        start = seg["start"]
        end = seg["end"]
        text = seg["text"]
        
        # Format timestamps for SRT
        start_h = int(start // 3600)
        start_m = int((start % 3600) // 60)
        start_s = int(start % 60)
        start_ms = int((start % 1) * 1000)
        
        end_h = int(end // 3600)
        end_m = int((end % 3600) // 60)
        end_s = int(end % 60)
        end_ms = int((end % 1) * 1000)
        
        f.write(f"{i}\n")
        f.write(f"{start_h:02d}:{start_m:02d}:{start_s:02d},{start_ms:03d} --> ")
        f.write(f"{end_h:02d}:{end_m:02d}:{end_s:02d},{end_ms:03d}\n")
        f.write(f"{text.strip()}\n\n")

print("Saved to transcript.srt")