In [None]:
import os
import sunra_client
import getpass
import json
from datetime import datetime

# Set up API key if not already configured
if 'SUNRA_KEY' not in os.environ:
    api_key = getpass.getpass("Enter your Sunra API key: ")
    os.environ['SUNRA_KEY'] = api_key

# Configure the client
sunra_client.config(credentials=os.environ['SUNRA_KEY'])
print("✓ Sunra client configured successfully!")


In [None]:
def transcribe_audio(audio_url, language="English", tag_audio_events=True, 
                     speaker_diarization=False, show_details=True):
    """
    Transcribe audio from a URL
    
    Args:
        audio_url (str): URL of the audio file
        language (str): Language of the audio
        tag_audio_events (bool): Whether to tag audio events (music, applause, etc.)
        speaker_diarization (bool): Whether to identify different speakers
        show_details (bool): Whether to show detailed results
    """
    
    print(f"Transcribing audio from: {audio_url}")
    print(f"Language: {language}")
    print(f"Tag audio events: {tag_audio_events}")
    print(f"Speaker diarization: {speaker_diarization}")
    print("Please wait...")
    
    try:
        result = sunra_client.subscribe(
            "elevenlabs/scribe-v1/speech-to-text",
            arguments={
                "audio": audio_url,
                "language": language,
                "tag_audio_events": tag_audio_events,
                "speaker_diarization": speaker_diarization
            },
            with_logs=True,
            on_enqueue=lambda req_id: print(f"✓ Request enqueued: {req_id}"),
            on_queue_update=lambda status: print(f"Status: {status}"),
        )
        
        if show_details:
            print("\n" + "="*50)
            print("TRANSCRIPTION RESULTS")
            print("="*50)
            
            # Display the main transcript
            if result.get('text'):
                print(f"\nTranscript:\n{result['text']}")
            
            # Display additional details if available
            if result.get('segments'):
                print(f"\nNumber of segments: {len(result['segments'])}")
                
                # Show first few segments as examples
                print("\nFirst few segments:")
                for i, segment in enumerate(result['segments'][:3]):
                    print(f"  Segment {i+1}:")
                    print(f"    Text: {segment.get('text', 'N/A')}")
                    print(f"    Start: {segment.get('start', 'N/A')}s")
                    print(f"    End: {segment.get('end', 'N/A')}s")
                    if segment.get('speaker'):
                        print(f"    Speaker: {segment['speaker']}")
                    print()
            
            # Show any detected events
            if result.get('events'):
                print(f"Audio events detected: {len(result['events'])}")
                for event in result['events'][:5]:  # Show first 5 events
                    print(f"  - {event}")
            
            # Show language detection if available
            if result.get('language'):
                print(f"Detected language: {result['language']}")
                
            # Show confidence scores if available
            if result.get('confidence'):
                print(f"Confidence score: {result['confidence']}")
        
        return result
        
    except Exception as e:
        print(f"❌ Error transcribing audio: {e}")
        return None

def format_transcript_with_timestamps(result):
    """Format transcript with timestamps for better readability"""
    if not result or not result.get('segments'):
        return "No segments available"
    
    formatted_transcript = []
    
    for segment in result['segments']:
        start_time = segment.get('start', 0)
        end_time = segment.get('end', 0)
        text = segment.get('text', '')
        speaker = segment.get('speaker', '')
        
        # Format time as MM:SS
        start_formatted = f"{int(start_time//60):02d}:{int(start_time%60):02d}"
        end_formatted = f"{int(end_time//60):02d}:{int(end_time%60):02d}"
        
        speaker_label = f" [{speaker}]" if speaker else ""
        
        formatted_transcript.append(f"[{start_formatted}-{end_formatted}]{speaker_label}: {text}")
    
    return "\n".join(formatted_transcript)

print("Helper functions ready!")


In [None]:
# Basic transcription example
sample_audio = "https://assets.sunra.ai/uploads/1749243418768-74d68e25.wav"

print("Basic transcription example:")
result = transcribe_audio(sample_audio, language="English", tag_audio_events=True, speaker_diarization=False)


In [None]:
# Transcription with speaker diarization
print("Transcription with speaker diarization:")
result_with_speakers = transcribe_audio(
    sample_audio, 
    language="English", 
    tag_audio_events=True, 
    speaker_diarization=True
)

# Display formatted transcript with timestamps
if result_with_speakers:
    print("\n" + "="*50)
    print("FORMATTED TRANSCRIPT WITH TIMESTAMPS")
    print("="*50)
    formatted_transcript = format_transcript_with_timestamps(result_with_speakers)
    print(formatted_transcript)


In [None]:
# Compare with and without audio event tagging
print("=== Without Audio Event Tagging ===")
result_no_events = transcribe_audio(
    sample_audio, 
    language="English", 
    tag_audio_events=False, 
    speaker_diarization=False,
    show_details=False
)

print("\n=== With Audio Event Tagging ===")
result_with_events = transcribe_audio(
    sample_audio, 
    language="English", 
    tag_audio_events=True, 
    speaker_diarization=False,
    show_details=False
)

# Compare results
print("\n" + "="*50)
print("COMPARISON")
print("="*50)

if result_no_events:
    print("Without events:")
    print(result_no_events.get('text', 'No text available'))

if result_with_events:
    print("\nWith events:")
    print(result_with_events.get('text', 'No text available'))
    
    if result_with_events.get('events'):
        print(f"\nDetected events: {result_with_events['events']}")


In [None]:
# Common languages supported
supported_languages = [
    "English",
    "Spanish", 
    "French",
    "German",
    "Italian",
    "Portuguese",
    "Dutch",
    "Polish",
    "Russian",
    "Chinese",
    "Japanese",
    "Korean",
    "Arabic",
    "Hindi",
    "Auto"  # Automatic language detection
]

print("Supported languages:")
for i, lang in enumerate(supported_languages, 1):
    print(f"{i:2d}. {lang}")

# Example with automatic language detection
print("\n=== Using Automatic Language Detection ===")
result_auto = transcribe_audio(
    sample_audio, 
    language="Auto", 
    tag_audio_events=True, 
    speaker_diarization=False,
    show_details=False
)

if result_auto:
    print(f"Detected language: {result_auto.get('language', 'Unknown')}")
    print(f"Transcript: {result_auto.get('text', 'No text available')}")


In [None]:
def check_audio_url(audio_url):
    """Check if an audio URL is accessible"""
    try:
        import requests
        
        print(f"Checking audio URL: {audio_url}")
        response = requests.head(audio_url)  # Use HEAD to check without downloading
        
        if response.status_code == 200:
            print(f"✓ Audio URL is accessible!")
            
            # Check content type if available
            content_type = response.headers.get('content-type', 'Unknown')
            print(f"Content type: {content_type}")
            
            # Check file size if available
            content_length = response.headers.get('content-length')
            if content_length:
                size_mb = int(content_length) / (1024 * 1024)
                print(f"File size: {size_mb:.2f} MB")
                
                if size_mb > 25:  # Many APIs have size limits
                    print("⚠️  Warning: Large file - may need to be split for some services")
            
            return True
        else:
            print(f"❌ HTTP Error: {response.status_code}")
            return False
            
    except Exception as e:
        print(f"❌ Error checking URL: {e}")
        return False

# Audio format requirements
print("Audio Format Requirements:")
print("- Supported formats: MP3, WAV, FLAC, M4A, OGG")
print("- Maximum file size: Usually 25MB (varies by service)")
print("- Sample rate: 16kHz or higher recommended")
print("- Bit depth: 16-bit or higher")
print("- Duration: Up to 30 minutes typically")

print("\nFile Upload Options:")
print("1. Cloud storage (Google Drive, Dropbox, etc.)")
print("2. Audio hosting services (SoundCloud, etc.)")
print("3. Your own web server")
print("4. GitHub (for public repositories)")

# Test with sample audio
print("\n=== Testing Sample Audio URL ===")
sample_url = "https://assets.sunra.ai/uploads/1749243418768-74d68e25.wav"
check_audio_url(sample_url)

# Uncomment to test your own audio file
# print("\n=== Testing Your Audio URL ===")
# your_audio_url = "YOUR_AUDIO_URL_HERE"
# check_audio_url(your_audio_url)


In [None]:
def create_meeting_transcript(audio_url):
    """Create a formatted meeting transcript with speakers and timestamps"""
    print("Creating meeting transcript...")
    
    result = transcribe_audio(
        audio_url, 
        language="English", 
        tag_audio_events=True, 
        speaker_diarization=True,
        show_details=False
    )
    
    if not result:
        return "Transcription failed"
    
    # Create formatted output
    output = []
    output.append("=" * 60)
    output.append("MEETING TRANSCRIPT")
    output.append("=" * 60)
    output.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    output.append("")
    
    if result.get('segments'):
        current_speaker = None
        
        for segment in result['segments']:
            speaker = segment.get('speaker', 'Unknown')
            text = segment.get('text', '').strip()
            start_time = segment.get('start', 0)
            
            # Format time
            time_formatted = f"{int(start_time//60):02d}:{int(start_time%60):02d}"
            
            # New speaker or first segment
            if speaker != current_speaker:
                output.append(f"\n[{time_formatted}] {speaker}:")
                current_speaker = speaker
            
            output.append(f"  {text}")
    
    # Add summary
    output.append("\n" + "=" * 60)
    output.append("SUMMARY")
    output.append("=" * 60)
    
    if result.get('segments'):
        total_duration = max(seg.get('end', 0) for seg in result['segments'])
        speakers = list(set(seg.get('speaker', 'Unknown') for seg in result['segments']))
        
        output.append(f"Duration: {int(total_duration//60):02d}:{int(total_duration%60):02d}")
        output.append(f"Speakers: {', '.join(speakers)}")
        output.append(f"Total segments: {len(result['segments'])}")
    
    if result.get('events'):
        output.append(f"Audio events: {', '.join(result['events'])}")
    
    return "\n".join(output)

def create_interview_transcript(audio_url):
    """Create a formatted interview transcript"""
    print("Creating interview transcript...")
    
    result = transcribe_audio(
        audio_url, 
        language="English", 
        tag_audio_events=False,  # Focus on speech
        speaker_diarization=True,
        show_details=False
    )
    
    if not result:
        return "Transcription failed"
    
    # Create Q&A format
    output = []
    output.append("=" * 60)
    output.append("INTERVIEW TRANSCRIPT")
    output.append("=" * 60)
    output.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    output.append("")
    
    if result.get('segments'):
        for i, segment in enumerate(result['segments'], 1):
            speaker = segment.get('speaker', f'Speaker {i}')
            text = segment.get('text', '').strip()
            
            # Simple Q&A format
            label = "Q:" if i % 2 == 1 else "A:"
            output.append(f"{label} {text}")
            output.append("")
    
    return "\n".join(output)

# Example usage (uncomment to use with your audio)
print("Advanced transcript formatting examples:")
print("1. Meeting transcript with speakers and timestamps")
print("2. Interview transcript in Q&A format")
print("3. Lecture transcript with events")

# Example with sample audio
print("\n=== Sample Meeting Transcript ===")
sample_audio = "https://assets.sunra.ai/uploads/1749243418768-74d68e25.wav"
meeting_transcript = create_meeting_transcript(sample_audio)
print(meeting_transcript[:500] + "..." if len(meeting_transcript) > 500 else meeting_transcript)
