In [None]:
# Google Colab Code for Transcription with Timestamps and SRT Export

# Cell 1: Install dependencies
!pip install openai-whisper
!pip install pydub
!apt update && apt install ffmpeg -y

# Cell 2: Import libraries
import whisper
import torch
import os
import re
from datetime import timedelta
import json
from google.colab import files
import numpy as np
from IPython.display import display, HTML

# Cell 3: List files and select audio
print("Files in current directory:")
print("="*50)
for file in os.listdir():
    if file.endswith(('.mp3', '.wav', '.m4a', '.mp4', '.avi', '.mov', '.flac', '.ogg', '.webm')):
        print(f"{file}")
print("="*50)

audio_file = input("Enter the filename of your uploaded audio file: ").strip()

if not os.path.exists(audio_file):
    print(f"Error: File '{audio_file}' not found!")
else:
    print(f"File found: {audio_file}")

# Cell 4: Load Whisper model
print("\nLoading Whisper model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model_size = "large-v3"  # Best quality, similar to Turboscribe
model = whisper.load_model(model_size, device=device)
print(f"Model {model_size} loaded successfully!")

# Cell 5: Initial transcription (NO CONSTRAINTS)
print("\n" + "="*50)
print("PERFORMING INITIAL TRANSCRIPTION")
print("="*50)

language = input("Enter language code (or press Enter for auto-detect): ").strip() or None
print("Transcribing... This may take a few minutes...")

# Transcribe with word-level timestamps
result = model.transcribe(
    audio_file,
    language=language,
    word_timestamps=True,
    verbose=False
)

print("Transcription completed!")
print(f"Detected language: {result.get('language', 'unknown')}")
print(f"Total words: {len(result['text'].split())}")

# Store the original transcription result
original_result = result

# Cell 6: Helper functions
def format_timestamp(seconds):
    """Convert seconds to timestamp format (MM:SS) or (HH:MM:SS)"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int(seconds % 60)

    if hours > 0:
        return f"{hours}:{minutes:02d}:{seconds:02d}"
    else:
        return f"{minutes}:{seconds:02d}"

def format_srt_timestamp(seconds):
    """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)"""
    td = timedelta(seconds=seconds)
    hours = int(td.total_seconds() // 3600)
    minutes = int((td.total_seconds() % 3600) // 60)
    seconds = td.total_seconds() % 60

    return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}".replace('.', ',')

def format_transcription(result, max_words=None, max_duration=None, max_chars=None):
    """Format transcription with timestamps like Turboscribe"""
    formatted_segments = []

    for segment in result['segments']:
        if 'words' in segment and (max_words or max_duration or max_chars):
            current_words = []
            current_start = None
            current_chars = 0

            for word_info in segment.get('words', []):
                word = word_info['word'].strip()
                word_start = word_info['start']
                word_end = word_info['end']
                word_chars = len(word) + 1

                if current_start is None:
                    current_start = word_start

                should_split = False
                current_duration = word_end - current_start

                if max_words and len(current_words) >= max_words:
                    should_split = True
                elif max_duration and current_duration > max_duration:
                    should_split = True
                elif max_chars and current_chars + word_chars > max_chars:
                    should_split = True

                if should_split and current_words:
                    sub_text = ''.join(current_words)
                    formatted_segments.append({
                        'start': current_start,
                        'end': word_start,
                        'text': sub_text.strip()
                    })

                    current_words = [word]
                    current_start = word_start
                    current_chars = word_chars
                else:
                    current_words.append(word)
                    current_chars += word_chars

            if current_words:
                sub_text = ''.join(current_words)
                formatted_segments.append({
                    'start': current_start,
                    'end': segment['end'],
                    'text': sub_text.strip()
                })
        else:
            formatted_segments.append({
                'start': segment['start'],
                'end': segment['end'],
                'text': segment['text'].strip()
            })

    return formatted_segments

def create_srt(segments):
    """Create SRT formatted subtitles"""
    srt_content = ""
    for i, seg in enumerate(segments, 1):
        start_time = format_srt_timestamp(seg['start'])
        end_time = format_srt_timestamp(seg['end'])

        srt_content += f"{i}\n"
        srt_content += f"{start_time} --> {end_time}\n"
        srt_content += f"{seg['text']}\n\n"

    return srt_content

def export_files(formatted_segments, result, suffix=""):
    """Export all file formats"""
    base_filename = os.path.splitext(audio_file)[0]

    # Create suffix for filename if parameters are used
    if suffix:
        suffix = "_" + suffix

    # Generate transcription text
    transcription_text = ""
    for seg in formatted_segments:
        timestamp = format_timestamp(seg['start'])
        line = f"({timestamp}) {seg['text']}"
        transcription_text += line + "\n"

    # Save files
    txt_filename = f"{base_filename}{suffix}_transcription.txt"
    with open(txt_filename, 'w', encoding='utf-8') as f:
        f.write(transcription_text)

    srt_content = create_srt(formatted_segments)
    srt_filename = f"{base_filename}{suffix}.srt"
    with open(srt_filename, 'w', encoding='utf-8') as f:
        f.write(srt_content)

    # WebVTT
    vtt_content = "WEBVTT\n\n" + srt_content.replace(',', '.')
    vtt_filename = f"{base_filename}{suffix}.vtt"
    with open(vtt_filename, 'w', encoding='utf-8') as f:
        f.write(vtt_content)

    # CSV
    import csv
    csv_filename = f"{base_filename}{suffix}.csv"
    with open(csv_filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Start Time', 'End Time', 'Duration', 'Text'])
        for seg in formatted_segments:
            duration = seg['end'] - seg['start']
            writer.writerow([
                format_timestamp(seg['start']),
                format_timestamp(seg['end']),
                f"{duration:.2f}s",
                seg['text']
            ])

    # JSON
    json_filename = f"{base_filename}{suffix}_full.json"
    with open(json_filename, 'w', encoding='utf-8') as f:
        json.dump({
            'segments': formatted_segments,
            'language': result.get('language', 'unknown'),
            'text': result['text']
        }, f, ensure_ascii=False, indent=2)

    return {
        'txt': txt_filename,
        'srt': srt_filename,
        'vtt': vtt_filename,
        'csv': csv_filename,
        'json': json_filename
    }

# Cell 7: Display initial transcription
print("\n" + "="*50)
print("INITIAL TRANSCRIPTION (No constraints)")
print("="*50 + "\n")

initial_segments = format_transcription(original_result)
for seg in initial_segments[:10]:  # Show first 10 segments as preview
    timestamp = format_timestamp(seg['start'])
    print(f"({timestamp}) {seg['text']}")

if len(initial_segments) > 10:
    print(f"\n... and {len(initial_segments) - 10} more segments")

print(f"\nTotal segments: {len(initial_segments)}")

# Cell 8: Interactive export function
def interactive_export():
    """Interactive function to export with different parameters"""
    print("\n" + "="*50)
    print("EXPORT WITH CUSTOM PARAMETERS")
    print("="*50)
    print("Enter parameters (0 or Enter for no limit):\n")

    max_words = int(input("Max Words Per Segment: ") or 0)
    max_duration = float(input("Max Duration Per Segment (seconds): ") or 0)
    max_chars = int(input("Max Characters Per Segment: ") or 0)

    # Convert 0 to None
    max_words = max_words if max_words > 0 else None
    max_duration = max_duration if max_duration > 0 else None
    max_chars = max_chars if max_chars > 0 else None

    # Create suffix for filename
    suffix_parts = []
    if max_words:
        suffix_parts.append(f"w{max_words}")
    if max_duration:
        suffix_parts.append(f"d{int(max_duration)}")
    if max_chars:
        suffix_parts.append(f"c{max_chars}")
    suffix = "_".join(suffix_parts) if suffix_parts else "default"

    print(f"\nApplying constraints...")
    print(f"Max words: {max_words if max_words else 'No limit'}")
    print(f"Max duration: {max_duration if max_duration else 'No limit'} seconds")
    print(f"Max characters: {max_chars if max_chars else 'No limit'}")

    # Format with new parameters
    formatted_segments = format_transcription(original_result, max_words, max_duration, max_chars)

    # Preview
    print(f"\nPreview (first 5 segments):")
    print("-" * 40)
    for seg in formatted_segments[:5]:
        timestamp = format_timestamp(seg['start'])
        print(f"({timestamp}) {seg['text']}")

    print(f"\nTotal segments: {len(formatted_segments)}")

    # Export files
    filenames = export_files(formatted_segments, original_result, suffix)

    print("\n Files created:")
    for file_type, filename in filenames.items():
        print(f"  - {filename}")

    # Download files
    download_choice = input("\nDownload files now? (y/n): ").strip().lower()
    if download_choice == 'y':
        for filename in filenames.values():
            try:
                files.download(filename)
            except:
                pass
        print(" Download initiated!")

    return formatted_segments, filenames

# Cell 9: Main export loop
print("\n" + "="*50)
print("READY FOR EXPORT")
print("="*50)
print("\nYou can now export the transcription with different parameters.")
print("Each export creates new files with different constraints.")

# Keep track of all exports
all_exports = []

while True:
    print("\n" + "="*50)
    choice = input("\n[E]xport with parameters / [V]iew original / [Q]uit: ").strip().upper()

    if choice == 'E':
        segments, filenames = interactive_export()
        all_exports.append(filenames)

    elif choice == 'V':
        print("\n" + "="*50)
        print("ORIGINAL TRANSCRIPTION")
        print("="*50)
        for seg in initial_segments[:20]:
            timestamp = format_timestamp(seg['start'])
            print(f"({timestamp}) {seg['text']}")
        if len(initial_segments) > 20:
            print(f"\n... and {len(initial_segments) - 20} more segments")

    elif choice == 'Q':
        break

    else:
        print("Invalid choice. Please enter E, V, or Q.")

# Cell 10: Final summary
print("\n" + "="*50)
print("SESSION COMPLETE")
print("="*50)
print(f"\nTotal exports created: {len(all_exports)}")

if all_exports:
    print("\nAll exported files:")
    for i, export in enumerate(all_exports, 1):
        print(f"\nExport {i}:")
        for file_type, filename in export.items():
            print(f"  - {filename}")

print("\n All operations completed successfully!")
print("You can find all files in the current directory.")

# Cell 11: Quick re-export function (can be run independently after initial transcription)
def quick_export(max_words=None, max_duration=None, max_chars=None, auto_download=False):
    """Quick function to export with specific parameters"""

    # Create suffix
    suffix_parts = []
    if max_words:
        suffix_parts.append(f"w{max_words}")
    if max_duration:
        suffix_parts.append(f"d{int(max_duration)}")
    if max_chars:
        suffix_parts.append(f"c{max_chars}")
    suffix = "_".join(suffix_parts) if suffix_parts else "default"

    # Format and export
    formatted_segments = format_transcription(original_result, max_words, max_duration, max_chars)
    filenames = export_files(formatted_segments, original_result, suffix)

    print(f" Exported with: words={max_words}, duration={max_duration}, chars={max_chars}")
    print(f"Files: {list(filenames.values())}")

    if auto_download:
        for filename in filenames.values():
            files.download(filename)

    return formatted_segments, filenames

# Example quick exports (uncomment to use):
# quick_export(max_words=10, auto_download=True)
# quick_export(max_duration=5, auto_download=True)
# quick_export(max_chars=100, auto_download=True)