In [1]:
!pip install -q openai-whisper
!pip install -q transformers sentencepiece accelerate

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m803.2/803.2 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone


In [2]:
## whisper large: flan t5-large: comprehensive : best qlty

"""
Smart T5 Abstractive Audio Summarizer
Handles ANY audio length (30 seconds to 3+ hours)
Uses Google's T5 model with intelligent adaptive summarization

and

Smart T5 Large Audio to Professional Documents Converter
Uses: Whisper Large + FLAN-T5-Large
Outputs: BRD, Purchase Order, and other business documents
Optimized for Kaggle with GPU acceleration
"""

import whisper
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os
import gc
import re
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')


# ============================================================================
# INSTALLATION (Run in Kaggle first cell):
# !pip install -q openai-whisper transformers sentencepiece accelerate
# ============================================================================


class SmartT5LargeDocumentGenerator:
    """
    Intelligent T5-based audio summarizer with adaptive length control
    Perfect for both short (30 sec) and long (3+ hours) audio 
    and +
    Complete pipeline: Audio ‚Üí T5 Large Summary ‚Üí Professional Documents
    Handles: BRD, Purchase Orders, Meeting Minutes, Technical Specs
    """
    # Clear memory
    torch.cuda.empty_cache()
    gc.collect()
    
    def __init__(self, whisper_model="large", t5_model = "google/flan-t5-large"):
        """
        Initialize with T5 model
        
        Args:
            whisper_model: 'tiny', 'base', 'small', 'medium', 'large'
            t5_model: Choose from:
                - 't5-small' (Fast, 60M params, good for short audio)
                - 't5-base' (Balanced, 220M params, recommended)
                - 't5-large' (Best quality, 770M params, slower)
                - 't5-3b' (Highest quality, 3B params, very slow)
                - 'google/flan-t5-base' (Instruction-tuned, excellent)
                - 'google/flan-t5-large' (Best instruction-following)
        """
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        print(f"üîß Device: {self.device}")
        
        if self.device == "cuda":
            print(f"üöÄ GPU: {torch.cuda.get_device_name(0)}")
            print(f"üíæ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        
        
        # Load Whisper large
        print(f"\nüì• Loading Whisper '{whisper_model}'...")
        self.whisper_model = whisper.load_model(whisper_model, device=self.device)
        print("‚úÖ Whisper Large loaded!")
        
        # Load T5
        print(f"\nüì• Loading T5 '{t5_model}'...")
        self.tokenizer = T5Tokenizer.from_pretrained(t5_model, legacy=False)

        
        self.model = T5ForConditionalGeneration.from_pretrained(
            t5_model,
            dtype=torch.float16 if self.device == "cuda" else torch.float32
        ).to(self.device)
        print("‚úÖ T5 loaded!")
        
        # Store model type for prefix handling
        self.is_flan = "flan" in t5_model.lower()
        
        print("\n" + "="*70)
        print("‚ú® Smart T5 Audio Summarizer Ready!")
        print("="*70 + "\n")
    
    def transcribe_audio(self, audio_path):
        
        """Transcribe multilingual audio to English"""
        
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"‚ùå File not found: {audio_path}")
        
        file_size = os.path.getsize(audio_path) / (1024 * 1024)
        print(f"üéµ Audio: {os.path.basename(audio_path)} ({file_size:.2f} MB)")
        print(f"‚è≥ Transcribing...\n")
        
        result = self.whisper_model.transcribe(
            audio_path,
            task='translate',
            language=None,
            fp16=self.device == "cuda",
            verbose=False,
            beam_size=5,
            best_of=5,
            temperature=0.0
        )
        
        lang_map = {
            'hi': 'Hindi (‡§π‡§ø‡§®‡•ç‡§¶‡•Ä)',
            'en': 'English',
            'mr': 'Marathi (‡§Æ‡§∞‡§æ‡§†‡•Ä)'
        }
        
        detected = result.get('language', 'unknown')
        text = result['text'].strip()
        word_count = len(text.split())
        
        print(f"‚úÖ Transcription complete!")
        print(f"üåç Language: {lang_map.get(detected, detected)}")
        print(f"üìù Words: {word_count}")
        #print(f"üìè Characters: {len(text)}\n")
        
        return {
            'text': text,
            'language': detected,
            'language_name': lang_map.get(detected, detected),
            'word_count': word_count
        }
    
    def calculate_adaptive_summary_length(self, word_count, strategy):
        """
        Intelligent adaptive summary length calculation
        Optimized for T5 model (works for ANY audio length)
        
        Args:
            word_count: Number of words in transcription
            strategy: 'ultra_concise', 'concise', 'balanced', 'detailed', 'comprehensive'
        
        Returns:
            dict with recommended parameters
        """
        
        # T5-optimized strategies
        strategies = {
            'ultra_concise': {
                'base_ratio': 0.12,
                'min_words': 12,
                'max_words': 60,
                'description': 'Single sentence summaries'
            },
            'concise': {
                'base_ratio': 0.20,
                'min_words': 20,
                'max_words': 100,
                'description': 'Brief, punchy summaries'
            },
            'balanced': {
                'base_ratio': 0.30,
                'min_words': 30,
                'max_words': 180,
                'description': 'Balanced detail and brevity'
            },
            'detailed': {
                'base_ratio': 0.45,
                'min_words': 50,
                'max_words': 300,
                'description': 'Comprehensive coverage'
            },
            'comprehensive': {
                'base_ratio': 0.60,
                'min_words': 80,
                'max_words': 450,
                'description': 'Extensive detail'
            },
            # ‚ú® NEW: HYBRID STRATEGY (detailed + comprehensive)
            'hybrid': {
                'base_ratio': 0.525,  # Average of 0.45 and 0.60
                'min_words': 65,      # Average of 50 and 80
                'max_words': 375,     # Average of 300 and 450
                'description': 'Hybrid: detailed + comprehensive'
            }
        }
        
        config = strategies.get(strategy, strategies[strategy])
        
        # Adaptive calculation based on input length
        if word_count < 40:
            # Very short (< 30 seconds)
            max_words = max(config['min_words'], int(word_count * 0.85))
            min_words = max(8, int(word_count * 0.5))
            ratio = 0.85
            
        elif word_count < 120:
            # Short (30 sec - 1 min)
            max_words = max(config['min_words'], int(word_count * 0.65))
            min_words = max(12, int(word_count * 0.35))
            ratio = 0.65
            
        elif word_count < 250:
            # Medium short (1-2 min)
            max_words = int(word_count * 0.50)
            min_words = int(word_count * 0.25)
            ratio = 0.50
            
        elif word_count < 600:
            # Medium (2-5 min)
            max_words = int(word_count * config['base_ratio'])
            min_words = int(word_count * (config['base_ratio'] * 0.45))
            ratio = config['base_ratio']
            
        elif word_count < 1500:
            # Long (5-15 min)
            max_words = int(word_count * (config['base_ratio'] * 0.95))
            min_words = int(word_count * (config['base_ratio'] * 0.40))
            ratio = config['base_ratio'] * 0.95
            
        elif word_count < 4000:
            # Very long (15-45 min)
            max_words = int(word_count * (config['base_ratio'] * 0.85))
            min_words = int(word_count * (config['base_ratio'] * 0.35))
            ratio = config['base_ratio'] * 0.85
            
        else:
            # Extra long (45+ min)
            max_words = int(word_count * (config['base_ratio'] * 0.75))
            min_words = int(word_count * (config['base_ratio'] * 0.30))
            ratio = config['base_ratio'] * 0.75
        
        # Apply strategy limits
        max_words = min(max_words, config['max_words'])
        max_words = max(max_words, config['min_words'])
        
        min_words = min(min_words, max_words - 8)
        min_words = max(min_words, 8)
        
        # T5 uses tokens (roughly 1 word = 1.5 tokens)
        max_tokens = int(max_words * 1.5)
        min_tokens = int(min_words * 1.5)
        
        return {
            'max_length': max_tokens,
            'min_length': min_tokens,
            'max_words': max_words,
            'min_words': min_words,
            'ratio': ratio,
            'strategy': strategy,
            'description': config['description']
        }
    
    def generate_t5_summary(
        self, 
        text, 
        max_length, 
        min_length, 
        quality,
        custom_instruction=None
    ):
        """
        Generate abstractive summary using T5
        
        Args:
            text: Input text
            max_length: Maximum tokens
            min_length: Minimum tokens
            quality: 'fast', 'medium', 'high', 'best'
            custom_instruction: Optional custom instruction for FLAN-T5
        """
        
        # Quality to num_beams mapping
        beam_config = {
            'fast': 2,
            'medium': 4,
            'high': 6,
            'best': 10
        }
        num_beams = beam_config.get(quality, 10)
        
        # Prepare input with T5 prefix
        if custom_instruction and self.is_flan:
            # FLAN-T5 works better with instructions
            input_text = f"{custom_instruction}: {text}"
        else:
            # Standard T5 prefix
            input_text = f"summarize: {text}"
        
        # Tokenize
        inputs = self.tokenizer(
            input_text,
            return_tensors="pt",
            max_length=512,  # T5 input limit
            truncation=True,
            padding=True
        ).to(self.device)
        
        # Generate summary
        with torch.no_grad():
            summary_ids = self.model.generate(
                inputs["input_ids"],
                max_length=max_length,
                min_length=min_length,
                num_beams=num_beams,
                length_penalty=1.5,
                early_stopping=True,
                no_repeat_ngram_size=3,
                repetition_penalty=1.2,
                temperature=1.0
            )
        
        # Decode
        summary = self.tokenizer.decode(
            summary_ids[0],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True
        )
        
        return summary
        
    def extract_structured_info(self, summary_text):
        
        """Extract structured information from summary"""
        
        info = {
            'requirements': [],
            'decisions': [],
            'action_items': [],
            'timeline': [],
            'budget': [],
            'risks': [],
            'technical': [],
            'deliverables': [],
            'stakeholders': []
        }
        
        sentences = re.split(r'[.!?]+', summary_text)
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
            
            lower = sentence.lower()
            
            # Requirements
            if any(w in lower for w in ['require', 'need', 'must', 'should', 'shall', 'expect']):
                info['requirements'].append(sentence)
            
            # Decisions
            if any(w in lower for w in ['decide', 'agreed', 'approved', 'confirmed', 'finalized']):
                info['decisions'].append(sentence)
            
            # Action items
            if any(w in lower for w in ['will', 'task', 'action', 'assign', 'responsible', 'owner']):
                info['action_items'].append(sentence)
            
            # Timeline
            if any(w in lower for w in ['deadline', 'timeline', 'date', 'week', 'month', 'schedule', 'due']):
                info['timeline'].append(sentence)
            
            # Budget
            if any(w in lower for w in ['cost', 'budget', 'price', 'payment', 'fund', 'expense', '$', 'rs', 'rupee', 'inr']):
                info['budget'].append(sentence)
            
            # Risks
            if any(w in lower for w in ['risk', 'concern', 'issue', 'challenge', 'problem', 'blocker']):
                info['risks'].append(sentence)
            
            # Technical
            if any(w in lower for w in ['technical', 'technology', 'system', 'platform', 'api', 'database', 'infrastructure']):
                info['technical'].append(sentence)
            
            # Deliverables
            if any(w in lower for w in ['deliver', 'output', 'product', 'feature', 'component', 'milestone']):
                info['deliverables'].append(sentence)
            
            # Stakeholders
            if any(w in lower for w in ['stakeholder', 'team', 'department', 'client', 'customer', 'vendor']):
                info['stakeholders'].append(sentence)
        
        return info

    def generate_brd(self, summary_text, structured_info, metadata):
        """Generate Business Requirements Document"""
        
        doc = f"""
{'='*80}
BUSINESS REQUIREMENTS DOCUMENT (BRD)
{'='*80}

Document Information:
--------------------
Project Name:     {metadata.get('project_name', 'Audio Extracted Project')}
Document Date:    {metadata.get('date', datetime.now().strftime('%Y-%m-%d'))}
Version:          {metadata.get('version', '1.0')}
Prepared By:      {metadata.get('author', 'T5 Large Audio Analysis System')}
Status:           {metadata.get('status', 'Draft - Extracted from Audio')}
Department:       {metadata.get('department', 'TBD')}
Sponsor:          {metadata.get('sponsor', 'TBD')}


1. EXECUTIVE SUMMARY
{'='*80}

{summary_text}


2. BUSINESS OBJECTIVES
{'='*80}

Based on the audio discussion, the key business objectives are:

"""
        
        # Add objectives from summary
        if structured_info['requirements']:
            for idx, req in enumerate(structured_info['requirements'][:5], 1):
                doc += f"OBJ-{idx}: {req}\n"
        else:
            doc += "Business objectives to be refined based on stakeholder review.\n"
        
        doc += f"""

3. BUSINESS REQUIREMENTS
{'='*80}

"""
        
        if structured_info['requirements']:
            for idx, req in enumerate(structured_info['requirements'], 1):
                doc += f"BR-{idx:03d}: {req}\n"
                doc += f"         Priority: {metadata.get('priority', 'Medium')}\n"
                doc += f"         Status: New\n"
                doc += f"         Source: Audio Discussion\n\n"
        else:
            doc += "Business requirements extracted from executive summary above.\n"
        
        doc += f"""

4. FUNCTIONAL REQUIREMENTS
{'='*80}

"""
        
        if structured_info['technical']:
            for idx, tech in enumerate(structured_info['technical'], 1):
                doc += f"FR-{idx:03d}: {tech}\n"
                doc += f"         Category: {metadata.get('category', 'Technical')}\n"
                doc += f"         Priority: {metadata.get('priority', 'Medium')}\n\n"
        else:
            doc += "Functional requirements to be detailed in technical specification.\n"
        
        doc += f"""

5. STAKEHOLDERS
{'='*80}

"""
        
        if structured_info['stakeholders']:
            doc += "Stakeholders identified in discussion:\n\n"
            for stakeholder in structured_info['stakeholders']:
                doc += f"‚Ä¢ {stakeholder}\n"
        else:
            doc += f"""
Primary Stakeholders:
‚Ä¢ Project Sponsor: {metadata.get('sponsor', 'TBD')}
‚Ä¢ Business Owner: {metadata.get('business_owner', 'TBD')}
‚Ä¢ Project Manager: {metadata.get('pm', 'TBD')}
‚Ä¢ End Users: {metadata.get('end_users', 'As discussed in audio')}
"""
        
        doc += f"""

6. KEY DECISIONS
{'='*80}

"""
        
        if structured_info['decisions']:
            for idx, decision in enumerate(structured_info['decisions'], 1):
                doc += f"D{idx}. {decision}\n"
                doc += f"    Date: {metadata.get('date', 'TBD')}\n"
                doc += f"    Decision Maker: {metadata.get('decision_maker', 'TBD')}\n\n"
        else:
            doc += "Key decisions documented in executive summary.\n"
        
        doc += f"""

7. SCOPE
{'='*80}

In Scope:
"""
        
        if structured_info['deliverables']:
            for deliverable in structured_info['deliverables']:
                doc += f"‚Ä¢ {deliverable}\n"
        else:
            doc += "‚Ä¢ As defined in requirements above\n"
        
        doc += """

Out of Scope:
‚Ä¢ Items not mentioned in the audio discussion
‚Ä¢ Features to be considered for future phases

"""
        
        doc += f"""

8. TIMELINE & MILESTONES
{'='*80}

"""
        
        if structured_info['timeline']:
            for milestone in structured_info['timeline']:
                doc += f"‚Ä¢ {milestone}\n"
        else:
            doc += f"""
Project Timeline:
‚Ä¢ Requirements Phase: {metadata.get('req_phase', 'TBD')}
‚Ä¢ Design Phase: {metadata.get('design_phase', 'TBD')}
‚Ä¢ Development Phase: {metadata.get('dev_phase', 'TBD')}
‚Ä¢ Testing Phase: {metadata.get('test_phase', 'TBD')}
‚Ä¢ Deployment: {metadata.get('deployment', 'TBD')}
"""
        
        doc += f"""

9. BUDGET & RESOURCES
{'='*80}

"""
        
        if structured_info['budget']:
            for budget_item in structured_info['budget']:
                doc += f"‚Ä¢ {budget_item}\n"
        else:
            doc += f"""
Estimated Budget: {metadata.get('budget', 'To be determined')}

Resource Requirements:
‚Ä¢ Team Size: {metadata.get('team_size', 'TBD')}
‚Ä¢ Duration: {metadata.get('duration', 'TBD')}
‚Ä¢ External Resources: {metadata.get('external_resources', 'TBD')}
"""
        
        doc += f"""

10. RISKS & ASSUMPTIONS
{'='*80}

Risks Identified:
"""
        
        if structured_info['risks']:
            for idx, risk in enumerate(structured_info['risks'], 1):
                doc += f"{idx}. {risk}\n"
                doc += f"   Impact: {metadata.get('risk_impact', 'Medium')}\n"
                doc += f"   Mitigation: To be defined\n\n"
        else:
            doc += "Risk assessment to be conducted during project planning.\n"
        
        doc += """

Assumptions:
‚Ä¢ Resources will be available as per project timeline
‚Ä¢ Stakeholder approvals will be obtained in timely manner
‚Ä¢ Technical infrastructure is available and ready

"""
        
        doc += f"""

11. DEPENDENCIES
{'='*80}

‚Ä¢ Dependencies identified in audio discussion
‚Ä¢ External systems and integrations as required
‚Ä¢ Third-party services and vendors as needed


12. SUCCESS CRITERIA
{'='*80}

The project will be considered successful when:

‚Ä¢ All business requirements are met
‚Ä¢ System is deployed and operational
‚Ä¢ User acceptance testing is completed successfully
‚Ä¢ Stakeholders sign off on deliverables


13. APPROVAL
{'='*80}

This document has been reviewed and approved by:


Business Owner: _____________________    Date: ___________

Signature:      _____________________


Project Sponsor: ____________________    Date: ___________

Signature:       ____________________


{'='*80}
Document Generated from Audio Analysis using Whisper Large + FLAN-T5 Large
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
{'='*80}
"""
        
        return doc
    
    def generate_purchase_order(self, summary_text, structured_info, metadata):
        """Generate Purchase Order"""
        
        doc = f"""
{'='*80}
PURCHASE ORDER
{'='*80}

PO Number:        {metadata.get('po_number', 'PO-' + datetime.now().strftime('%Y%m%d-%H%M'))}
Date:             {metadata.get('date', datetime.now().strftime('%Y-%m-%d'))}
Status:           {metadata.get('status', 'Draft - Extracted from Audio')}


VENDOR INFORMATION:
{'='*80}
Vendor Name:      {metadata.get('vendor_name', 'TBD - As per audio discussion')}
Vendor Code:      {metadata.get('vendor_code', 'TBD')}
Address:          {metadata.get('vendor_address', 'TBD')}
City/State/ZIP:   {metadata.get('vendor_location', 'TBD')}
Contact Person:   {metadata.get('vendor_contact', 'TBD')}
Phone:            {metadata.get('vendor_phone', 'TBD')}
Email:            {metadata.get('vendor_email', 'TBD')}
GST/Tax ID:       {metadata.get('vendor_gst', 'TBD')}


BUYER INFORMATION:
{'='*80}
Company Name:     {metadata.get('company_name', 'Your Company Ltd.')}
Department:       {metadata.get('department', 'Procurement')}
Address:          {metadata.get('buyer_address', 'TBD')}
City/State/ZIP:   {metadata.get('buyer_location', 'TBD')}
Contact Person:   {metadata.get('buyer_contact', metadata.get('author', 'TBD'))}
Phone:            {metadata.get('buyer_phone', 'TBD')}
Email:            {metadata.get('buyer_email', 'TBD')}


PURCHASE ORDER SUMMARY:
{'='*80}

Based on Audio Discussion:
{summary_text}


DETAILED LINE ITEMS:
{'='*80}

"""
        
        # Extract items from deliverables or requirements
        items = structured_info['deliverables'] if structured_info['deliverables'] else structured_info['requirements']
        
        doc += f"{'Item':<5} {'Description':<45} {'Qty':<8} {'Unit':<10} {'Price':<12} {'Total':<12}\n"
        doc += "-" * 100 + "\n"
        
        if items:
            for idx, item in enumerate(items[:15], 1):  # Max 15 items
                clean_item = item.replace('\n', ' ')[:42]
                doc += f"{idx:<5} {clean_item:<45} {'TBD':<8} {'Each':<10} {'TBD':<12} {'TBD':<12}\n"
        else:
            doc += f"{'1':<5} {'Items/Services as per audio discussion':<45} {'TBD':<8} {'Each':<10} {'TBD':<12} {'TBD':<12}\n"
        
        doc += "\n"
        
        doc += f"""

COST BREAKDOWN:
{'='*80}

"""
        
        if structured_info['budget']:
            doc += "Cost Details (from audio discussion):\n\n"
            for budget_item in structured_info['budget']:
                doc += f"‚Ä¢ {budget_item}\n"
            doc += "\n"
        
        doc += f"""
Subtotal:                                                    {metadata.get('subtotal', 'TBD')}
Discount (if any):                                           {metadata.get('discount', '0.00')}
                                                             ___________
Subtotal after Discount:                                     {metadata.get('subtotal_after_discount', 'TBD')}

Tax/GST ({metadata.get('tax_rate', '18')}%):                                             {metadata.get('tax_amount', 'TBD')}
Shipping & Handling:                                         {metadata.get('shipping', 'TBD')}
Other Charges:                                               {metadata.get('other_charges', '0.00')}
                                                             ___________
TOTAL AMOUNT:                                                {metadata.get('total_amount', 'TBD')}
                                                             ===========


TERMS & CONDITIONS:
{'='*80}

Payment Terms:         {metadata.get('payment_terms', 'Net 30 Days')}
Delivery Terms:        {metadata.get('delivery_terms', 'FOB Destination')}
Expected Delivery:     {metadata.get('delivery_date', 'TBD - As per discussion')}
Delivery Address:      {metadata.get('delivery_address', 'As per buyer information above')}
Shipping Method:       {metadata.get('shipping_method', 'Standard')}
Warranty:              {metadata.get('warranty', 'As per vendor terms')}
Return Policy:         {metadata.get('return_policy', 'As per vendor terms')}


PAYMENT SCHEDULE:
{'='*80}

"""
        
        if metadata.get('payment_schedule'):
            doc += metadata['payment_schedule']
        else:
            doc += f"""
‚Ä¢ Advance Payment: {metadata.get('advance_payment', '0%')} on PO confirmation
‚Ä¢ Balance Payment: {metadata.get('balance_payment', '100%')} {metadata.get('payment_terms', 'Net 30')}
"""
        
        doc += f"""

SPECIAL INSTRUCTIONS:
{'='*80}

"""
        
        if structured_info['requirements']:
            doc += "Requirements from audio discussion:\n\n"
            for req in structured_info['requirements'][:5]:
                doc += f"‚Ä¢ {req}\n"
        else:
            doc += "As per audio discussion and mutual agreement.\n"
        
        doc += f"""

ADDITIONAL NOTES:
{'='*80}

"""
        
        if structured_info['action_items']:
            doc += "Action Items:\n\n"
            for action in structured_info['action_items'][:5]:
                doc += f"‚Ä¢ {action}\n"
        
        doc += f"""

VALIDITY:
{'='*80}

This Purchase Order is valid until: {metadata.get('validity_date', 'TBD')}


APPROVAL & AUTHORIZATION:
{'='*80}

Requested By:

Name:      {metadata.get('requested_by', 'TBD')}
Title:     {metadata.get('requested_title', 'TBD')}
Date:      {metadata.get('date', 'TBD')}
Signature: _____________________


Approved By:

Name:      {metadata.get('approved_by', 'TBD')}
Title:     {metadata.get('approved_title', 'Manager/Director')}
Date:      ___________
Signature: _____________________


Finance Approval:

Name:      {metadata.get('finance_approval', 'TBD')}
Title:     Finance Manager
Date:      ___________
Signature: _____________________


VENDOR ACCEPTANCE:
{'='*80}

We accept the terms and conditions of this Purchase Order:

Vendor Name:    {metadata.get('vendor_name', 'TBD')}
Authorized By:  _____________________
Title:          _____________________
Date:           ___________
Signature:      _____________________
Company Seal:   


{'='*80}
Purchase Order Generated from Audio Analysis
System: Whisper Large + FLAN-T5 Large
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
{'='*80}

IMPORTANT NOTES:
- This is a preliminary document extracted from audio discussion
- Please review and verify all details before finalization
- TBD items must be filled in before final approval
- Consult legal/procurement team for compliance review
"""
        
        return doc

    def process_audio_smart(
        self,
        audio_path,
        strategy,
        quality,
        custom_instruction=None,
        save_output=True,
        output_filename=None
    ):
        """
        Complete smart pipeline with T5 adaptive summarization
        
        Args:
            audio_path: Path to audio file
            strategy: 'ultra_concise', 'concise', 'balanced', 'detailed', 'comprehensive'
            quality: 'fast', 'medium', 'high', 'best'
            custom_instruction: Optional instruction for FLAN-T5
                               e.g., "Summarize the key business points"
            save_output: Save results to file
        """
        
        
        print("="*70)
        print("üéØ SMART T5 AUDIO SUMMARIZER")
        print("="*70 + "\n")
        
        
        # Step 1: Transcribe
        transcription = self.transcribe_audio(audio_path)
        word_count = transcription['word_count']
        
        # Step 2: Calculate smart summary length
        print(f"üß† Calculating adaptive summary length...")
        summary_config = self.calculate_adaptive_summary_length(word_count, strategy)
        
        print(f"üìä Strategy: {summary_config['strategy'].upper()}")
        print(f"üìù Description: {summary_config['description']}")
        print(f"üìè Input: {word_count} words")
        print(f"üìè Target: {summary_config['min_words']}-{summary_config['max_words']} words")
        print(f"üìâ Compression: {summary_config['ratio']*100:.0f}%")
        print(f"‚ö° Quality: {quality.upper()}\n")
        
        if custom_instruction:
            print(f"üí¨ Custom Instruction: {custom_instruction}\n")
        
        # Step 3: Handle very short text
        if word_count < 25:
            print("‚ö†Ô∏è Text very short (<25 words) - returning full transcription\n")
            summary = transcription['text']
            summary_words = word_count
        
        # Step 4: Summarize with T5
        else:
            print(f"üìä Generating T5 summary (process_audio_smart)...")
            
            # For long texts, use chunking
            if word_count > 400:
                summary = self._summarize_long_text(
                    transcription['text'],
                    summary_config,
                    quality,
                    custom_instruction
                )
            else:
                summary = self.generate_t5_summary(
                    transcription['text'],
                    max_length=summary_config['max_length'],
                    min_length=summary_config['min_length'],
                    quality=quality,
                    custom_instruction=custom_instruction
                )
            
            summary_words = len(summary.split())
            print(f"‚úÖ Summary generated! ({summary_words} words)\n")
        
        # Prepare results
        results = {
            'audio_file': os.path.basename(audio_path),
            'language': transcription['language_name'],
            'transcription': transcription['text'],
            'summary': summary,
            'input_words': word_count,
            'summary_words': summary_words,
            'compression_ratio': (1 - summary_words/word_count) * 100 if word_count > 0 else 0,
            'strategy': strategy,
            'quality': quality,
            'config': summary_config,
            'custom_instruction': custom_instruction
        }

    # Display results
        self._display_results(results)
    
    # Save results
        if save_output:
            self._save_results(results, custom_filename=output_filename)
    
    # ‚úÖ ADD THIS: Return the results!
        return results

    def process_audio_to_document(
        self,
        audio_path,           
        summary_text,
        document_type='brd',
        custom_instruction=None,
        metadata=None,
        save_output=True,
        output_filename=None
    ):
        """
        Complete pipeline: not Audio ‚Üí Summary ‚Üí Document
        
        Args:
            audio_path: Path to audio file
            document_type: 'brd' or 'purchase_order'
            custom_instruction: Custom instruction for T5
            metadata: Document metadata
        
        Returns:
            dict with transcription, summary, and formatted document
        """
        
        print("="*70)
        print(f"AUDIO TO {document_type.upper()} CONVERTER")
        print("="*70 + "\n")
        
        """ Step 1: Transcribe
        print("STEP 1: Transcribing with Whisper Large...")
        transcription = self.transcribe_audio(audio_path)
        
         Step 2: Generate Summary
        print("STEP 2: Generating summary with FLAN-T5 Large...")
        summary = self.generate_summary(
            transcription['text'],
            custom_instruction=custom_instruction
        

        summary = self.process_audio_smart(audio_path,
            strategy,
            quality,
            custom_instruction=None,
            save_output=True,
            output_filename=None))"""
        
        # Step 3: Extract structured information
        print("STEP 3: Extracting structured information...")
        structured_info = self.extract_structured_info(summary_text)
        
        # Step 4: Generate document
        print(f"STEP 4: Generating {document_type.upper()}...\n")
        
        if metadata is None:
            metadata = {}
        
        metadata.setdefault('project_name', os.path.basename(audio_path).split('.')[0])
        metadata.setdefault('date', datetime.now().strftime('%Y-%m-%d'))
        
        if document_type == 'brd':
            formatted_doc = self.generate_brd(summary_text, structured_info, metadata)
        elif document_type == 'purchase_order':
            formatted_doc = self.generate_purchase_order(summary_text, structured_info, metadata)
        else:
            raise ValueError(f"Unknown document type: {document_type}")
        
        # Step 5: Save
        output_filename = f"/kaggle/working/{document_type}_{metadata['project_name']}.txt"
        with open(output_filename, 'w', encoding='utf-8') as f:
            f.write(formatted_doc)
        
        print(f"‚úÖ {document_type.upper()} generated and saved!")
        print(f"üìÅ File: {output_filename}\n")
        
        return {
            'structured_info': structured_info,
            'formatted_document': formatted_doc,
            'output_file': output_filename
        }

        return results

    # Display results
        self._display_results(results)
        
        # Save results
        if save_output:
            self._save_results(results, custom_filename=output_filename)
        
    
    def _summarize_long_text(self, text, summary_config, quality, custom_instruction):
        """Handle long texts with intelligent chunking"""
        # T5 handles ~400 words well per chunk
        chunk_size = 400  # words
        words = text.split()
        chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
        
        print(f"  üìÑ Processing {len(chunks)} chunk(s)...")
        
        chunk_summaries = []
        for idx, chunk in enumerate(chunks):
            chunk_words = len(chunk.split())
            
            if chunk_words < 25:
                continue
            
            # Adaptive length per chunk
            chunk_config = self.calculate_adaptive_summary_length(
                chunk_words,
                summary_config['strategy']
            )
            
            print(f"    ‚û§ Chunk {idx+1}/{len(chunks)} ({chunk_words} words)...", end=" ")
            
            try:
                chunk_summary = self.generate_t5_summary(
                    chunk,
                    max_length=chunk_config['max_length'],
                    min_length=chunk_config['min_length'],
                    quality=quality,
                    custom_instruction=custom_instruction
                )
                chunk_summaries.append(chunk_summary)
                print("‚úì")
            except Exception as e:
                print(f"‚úó (Error: {str(e)[:30]})")
                continue
        
        if not chunk_summaries:
            return text[:500]
        
        combined = ' '.join(chunk_summaries)
        combined_words = len(combined.split())
        
        # Final summary if still too long
        if len(chunks) > 1 and combined_words > summary_config['max_words']:
            print(f"  ‚û§ Creating final summary ({combined_words} words)...", end=" ")
            try:
                final = self.generate_t5_summary(
                    combined,
                    max_length=summary_config['max_length'],
                    min_length=summary_config['min_length'],
                    quality=quality,
                    custom_instruction=custom_instruction
                )
                print("‚úì")
                return final
            except:
                print("‚úó")
        
        return combined
    
    def _display_results(self, results):
        """Display formatted results"""
        print("="*70)
        print("üìã RESULTS")
        print("="*70)
        print(f"üìÅ File: {results['audio_file']}")
        print(f"üåç Language: {results['language']}")
        print(f"üìä strategy: {results['strategy'].upper()}")
        print(f"‚ö° Quality: {results['quality'].upper()}")
        if results.get('custom_instruction'):
            print(f"üí¨ Instruction: {results['custom_instruction']}")
        print(f"üìè Original: {results['input_words']} words")
        print(f"üìè Summary: {results['summary_words']} words")
        print(f"üìâ Compression: {results['compression_ratio']:.1f}%")
        
        print("\n" + "="*70)
        print("‚ú® T5 ABSTRACTIVE SUMMARY:")
        print("="*70)
        print(results['summary'])
        
        print("\n" + "="*70)
        print("üìÑ FULL TRANSCRIPTION (first 400 chars):")
        print("="*70)
        print(results['transcription'][:400] + "...")
        print("="*70 + "\n")
    
    def _save_results(self, results, custom_filename=None):
        """Save to file with optional custom filename"""

        # Use custom filename if provided, otherwise use default
        if custom_filename:
            output_file = f"/kaggle/working/{custom_filename}"
        else:
            output_file = "/kaggle/working/t5_best_smart_summary.txt"
        
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write("="*70 + "\n")
            f.write("SMART T5 AUDIO SUMMARY REPORT\n")
            f.write("="*70 + "\n\n")
            f.write(f"Audio File: {results['audio_file']}\n")
            f.write(f"Language: {results['language']}\n")
            #f.write(f"Models: {results['models']}\n")
            f.write(f"Strategy: {results['strategy']}\n")
            f.write(f"Quality: {results['quality']}\n")
            if results.get('custom_instruction'):
                f.write(f"Custom Instruction: {results['custom_instruction']}\n")
            f.write(f"Original Words: {results['input_words']}\n")
            f.write(f"Summary Words: {results['summary_words']}\n")
            f.write(f"Compression: {results['compression_ratio']:.1f}%\n\n")
            f.write("="*70 + "\n")
            f.write("T5 ABSTRACTIVE SUMMARY:\n")
            f.write("="*70 + "\n\n")
            f.write(results['summary'] + "\n\n")
            f.write("="*70 + "\n")
            f.write("FULL TRANSCRIPTION:\n")
            f.write("="*70 + "\n\n")
            f.write(results['transcription'] + "\n")
        
        print(f"üíæ Saved to: {output_file}\n")

        """
        Initialize with T5 model
        
        Args:
            whisper_model: 'tiny', 'base', 'small', 'medium', 'large'
            t5_model: Choose from:
                - 't5-small' (Fast, 60M params, good for short audio)
                - 't5-base' (Balanced, 220M params, recommended)
                - 't5-large' (Best quality, 770M params, slower)
                - 't5-3b' (Highest quality, 3B params, very slow)
                - 'google/flan-t5-base' (Instruction-tuned, excellent)
                - 'google/flan-t5-large' (Best instruction-following)

            Args:
            word_count: Number of words in transcription
            strategy: 'ultra_concise', 'concise', 'balanced', 'detailed', 'comprehensive'
        
        """
# Test that class is defined
print("\n\n‚úÖ SmartT5LargeDocumentGenerator class defined successfully!")

2026-01-19 12:08:11.387487: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768824491.786236      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768824491.893366      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768824492.691637      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768824492.691678      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768824492.691681      55 computation_placer.cc:177] computation placer alr



‚úÖ SmartT5LargeDocumentGenerator class defined successfully!


In [3]:
# ============================================================================
# USAGE EXAMPLES
# ============================================================================

# Clear memory
torch.cuda.empty_cache()
gc.collect()  

import gc
if __name__ == "__main__":

    # ========================================================================
    # STEP 1: Load Models ONCE
    # ========================================================================
    
    
    # ========================================================================
    #  Audio --- Sumamry --- Document
    # ========================================================================
    
    print("\n" + "="*70)
    print("INITIALIZING T5 LARGE DOCUMENT GENERATOR")
    print("="*70 + "\n")
        
    summarizer = SmartT5LargeDocumentGenerator(
        whisper_model="large",
        t5_model="google/flan-t5-large"  # Instruction-tuned T5
        )

    audio_path="/kaggle/input/eng-hinbi-marathi-mix-audio/Aao na.m4a"
    
   # Create a unique filename using the current strategy name in the loop.
    # We use .replace('_', '-') for cleaner filenames if needed, but it works fine as is.
    t5_model="google/flan-t5-large"
    strategy="comprehensive"
    quality="best"
    safe_model_name = t5_model.replace('/', '_') 
    dynamic_output_filename = f"Summary_{safe_model_name}_{strategy}_{quality}-random-meeting.txt"
    # === END DYNAMIC FILENAME CHANGE ===

    results = summarizer.process_audio_smart(
        audio_path = audio_path,
        strategy="comprehensive",
        quality="best", 
        save_output=True,
        output_filename=dynamic_output_filename
        )

    print(f"Saving output to: /kaggle/working/{dynamic_output_filename}")

    # ========================================================================
    # EXAMPLE 1: Generate BRD from Audio
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 1: GENERATE BRD FROM AUDIO")
    print("="*70 + "\n")

    brd_output_filename = f"Summary_{safe_model_name}_{strategy}_{quality}_brd-random-meeting.txt"
    
    
    brd_results = summarizer.process_audio_to_document(
        audio_path=audio_path,
        summary_text=results['summary'],
        document_type='brd',
        custom_instruction="Extract all business requirements, decisions, timeline, and stakeholder information",
        metadata={
            'project_name': 'Mobile_App_Redesign',
            'version': '1.0',
            'status': 'Draft',
            'author': 'Business Analysis Team',
            'department': 'Product Development',
            'sponsor': 'VP of Product',
            'priority': 'High'
        },
        save_output=True,
        output_filename=brd_output_filename
    )
    
    #print("BRD Summary Preview:")
    #print(brd_results['summary_text'][:300] + "...\n")

    # ========================================================================
    # EXAMPLE 2: Generate Purchase Order from Audio
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 2: GENERATE PURCHASE ORDER FROM AUDIO")
    print("="*70 + "\n")

    po_output_filename = f"Summary_{safe_model_name}_{strategy}_{quality}_po-random-meeting.txt"

    po_results = summarizer.process_audio_to_document(
        audio_path=audio_path,
        summary_text=results['summary'],
        document_type='purchase_order',
        custom_instruction="Extract vendor details, items to be purchased, quantities, costs, and delivery terms",
        metadata={
            'po_number': 'PO-2024-001',
            'vendor_name': 'ABC Technology Solutions Pvt Ltd',
            'vendor_address': '123 Tech Park, Bangalore',
            'vendor_contact': 'Mr. Rajesh Kumar',
            'vendor_phone': '+91 98765 43210',
            'vendor_email': 'rajesh@abctech.com',
            'vendor_gst': '29ABCDE1234F1Z5',
            'company_name': 'XYZ Enterprises Ltd',
            'department': 'IT Procurement',
            'payment_terms': 'Net 30 Days',
            'delivery_date': '2024-02-15',
            'shipping_method': 'Express Delivery',
            'tax_rate': '18'
        },
        save_output=True,
        output_filename=po_output_filename
    )
    
    #print("PO Summary Preview:")
    #print(po_results['summary_text'][:300] + "...\n")
    
    
# Clear memory after processing
    del results
    torch.cuda.empty_cache()
    gc.collect()
    
    



INITIALIZING T5 LARGE DOCUMENT GENERATOR

üîß Device: cuda
üöÄ GPU: Tesla T4
üíæ GPU Memory: 15.83 GB

üì• Loading Whisper 'large'...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2.88G/2.88G [00:26<00:00, 117MiB/s]


‚úÖ Whisper Large loaded!

üì• Loading T5 'google/flan-t5-large'...


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

‚úÖ T5 loaded!

‚ú® Smart T5 Audio Summarizer Ready!

üéØ SMART T5 AUDIO SUMMARIZER

üéµ Audio: Mumma xray review BH neurosurgeon.m4a (2.01 MB)
‚è≥ Transcribing...

Detected language: Hindi


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 24925/24925 [02:40<00:00, 155.51frames/s]


‚úÖ Transcription complete!
üåç Language: Hindi (‡§π‡§ø‡§®‡•ç‡§¶‡•Ä)
üìù Words: 1092
üß† Calculating adaptive summary length...
üìä Strategy: COMPREHENSIVE
üìù Description: Extensive detail
üìè Input: 1092 words
üìè Target: 262-450 words
üìâ Compression: 57%
‚ö° Quality: BEST

üìä Generating T5 summary (process_audio_smart)...
  üìÑ Processing 3 chunk(s)...
    ‚û§ Chunk 1/3 (400 words)... ‚úì
    ‚û§ Chunk 2/3 (400 words)... ‚úì
    ‚û§ Chunk 3/3 (292 words)... ‚úì
‚úÖ Summary generated! (337 words)

üìã RESULTS
üìÅ File: Mumma xray review BH neurosurgeon.m4a
üåç Language: Hindi (‡§π‡§ø‡§®‡•ç‡§¶‡•Ä)
üìä strategy: COMPREHENSIVE
‚ö° Quality: BEST
üìè Original: 1092 words
üìè Summary: 337 words
üìâ Compression: 69.1%

‚ú® T5 ABSTRACTIVE SUMMARY:
summarize: We had come here on Tuesday and the doctor had referred us for X-ray and this is today's baby. This was from that day and these are her medicines. This is the Prega and Medcoba which is a combination of medicines. Yes,

# CUSTOM INSTRUCTION EXAMPLES FOR FLAN-T5
# ============================================================================
# ============================================================================
"""
üí° CUSTOM INSTRUCTION IDEAS (for FLAN-T5):

General:
- "Summarize the main points"
- "Provide a brief overview"
- "Extract the key information"

Business:
- "Summarize the key business decisions and action items"
- "List the main discussion points from this meeting"
- "What are the important takeaways for stakeholders?"

Educational:
- "Summarize the main concepts taught in this lecture"
- "What are the key learning objectives?"
- "Provide a student-friendly summary"

Technical:
- "Summarize the technical approach and methodology"
- "What are the main technical challenges discussed?"
- "Extract the implementation details"

News/Media:
- "Summarize who, what, when, where, and why"
- "What is the main story and its impact?"
- "Provide a headline-style summary"
"""


# ============================================================================
# T5 MODEL QUICK REFERENCE
# ============================================================================

"""
üìö T5 MODEL GUIDE:

1. t5-small (60M params)
   - Fastest
   - Good for short audio (<5 min)
   - Lower quality
   - Best for: Quick tests, resource-limited

2. t5-base (220M params) ‚Üê RECOMMENDED
   - Balanced speed/quality
   - Works for any audio length
   - Best general-purpose choice
   - Best for: Most use cases

3. t5-large (770M params)
   - High quality
   - Slower
   - Requires more GPU memory
   - Best for: Quality-critical tasks

4. google/flan-t5-base (220M params) ‚Üê BEST FOR INSTRUCTIONS
   - Instruction-tuned version
   - Works with custom instructions
   - Better understanding of context
   - Best for: Specific summarization goals

5. google/flan-t5-large (770M params)
   - Best quality with instructions
   - Excellent context understanding
   - Slower, needs good GPU
   - Best for: Professional applications

‚ö° SPEED COMPARISON (relative):
t5-small: 1x
t5-base: 2.5x
t5-large: 8x
flan-t5-base: 2.5x
flan-t5-large: 8x

üíæ MEMORY USAGE:
t5-small: ~300 MB
t5-base: ~900 MB
t5-large: ~3 GB
flan-t5-base: ~900 MB
flan-t5-large: ~3 GB
"""

Summary to BRD / PO

How It Works

Audio ‚Üí Transcription (Whisper)
Transcription ‚Üí Summary (T5)
Summary ‚Üí Structured Sections (NLP extraction)
Sections ‚Üí Formatted Document (Template formatting)

The formatter automatically extracts:

Requirements (words: require, need, must, should)
Decisions (words: decide, agreed, approved)
Action Items (words: will, task, assign, responsible)
Timeline (words: deadline, date, week, month)
Budget (words: cost, budget, price, payment)
Risks (words: risk, concern, issue, challenge)
Technical (words: technical, system, platform, API)

 Tips for Best Results

Use comprehensive/detailed strategy for documents
Provide metadata for professional formatting
Process once, save multiple formats:

In [4]:
# following code in markdown: Full audio to brd / po using above code, will take modules from this code and add it 
# to abv code and save it as version 3

""" Smart T5 Large Audio to Professional Documents Converter
Uses: Whisper Large + FLAN-T5-Large
Outputs: BRD, Purchase Order, and other business documents
Optimized for Kaggle with GPU acceleration
"""

import whisper
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os
import re
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# INSTALLATION (Run in Kaggle first cell):
# !pip install -q openai-whisper transformers sentencepiece accelerate
# ============================================================================


class SmartT5LargeDocumentGenerator:
    """
    Complete pipeline: Audio ‚Üí T5 Large Summary ‚Üí Professional Documents
    Handles: BRD, Purchase Orders, Meeting Minutes, Technical Specs
    """
    
    def __init__(self, whisper_model="large", t5_model="google/flan-t5-large"):
        """
        Initialize with LARGE models for highest quality
        
        Args:
            whisper_model: 'large' for best transcription
            t5_model: 'google/flan-t5-large' for best summarization
        """
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        
        if self.device == "cpu":
            print("‚ö†Ô∏è  WARNING: Running on CPU will be VERY SLOW!")
            print("   Enable GPU in Kaggle: Settings ‚Üí Accelerator ‚Üí GPU T4 x2\n")
        
        print(f"üîß Device: {self.device}")
        if self.device == "cuda":
            print(f"üöÄ GPU: {torch.cuda.get_device_name(0)}")
            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
            print(f"üíæ GPU Memory: {gpu_memory:.2f} GB")
        
        # Load Whisper Large
        print(f"\nüì• Loading Whisper Large...")
        self.whisper_model = whisper.load_model(whisper_model, device=self.device)
        print("‚úÖ Whisper Large loaded!")
        
        # Load FLAN-T5 Large
        print(f"\nüì• Loading FLAN-T5 Large...")
        self.tokenizer = T5Tokenizer.from_pretrained(t5_model, legacy=False)
        
        if self.device == "cuda":
            self.model = T5ForConditionalGeneration.from_pretrained(
                t5_model,
                torch_dtype=torch.float16,
                device_map="auto"
            )
        else:
            self.model = T5ForConditionalGeneration.from_pretrained(
                t5_model,
                torch_dtype=torch.float32
            ).to(self.device)
        
        print("‚úÖ FLAN-T5 Large loaded!")
        
        print("\n" + "="*70)
        print("‚ú® Smart T5 Large Document Generator Ready!")
        print("="*70 + "\n")
    
    def transcribe_audio(self, audio_path):
        """Transcribe audio with Whisper Large"""
        
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"‚ùå File not found: {audio_path}")
        
        file_size = os.path.getsize(audio_path) / (1024 * 1024)
        print(f"üéµ Audio: {os.path.basename(audio_path)} ({file_size:.2f} MB)")
        print(f"‚è≥ Transcribing with Whisper Large...\n")
        
        result = self.whisper_model.transcribe(
            audio_path,
            task='translate',
            language=None,
            fp16=self.device == "cuda",
            verbose=False,
            beam_size=5,
            best_of=5,
            temperature=0.0
        )
        
        lang_map = {
            'hi': 'Hindi (‡§π‡§ø‡§®‡•ç‡§¶‡•Ä)',
            'en': 'English',
            'mr': 'Marathi (‡§Æ‡§∞‡§æ‡§†‡•Ä)'
        }
        
        detected = result.get('language', 'unknown')
        text = result['text'].strip()
        word_count = len(text.split())
        
        print(f"‚úÖ Transcription complete!")
        print(f"üåç Language: {lang_map.get(detected, detected)}")
        print(f"üìù Words: {word_count}\n")
        
        return {
            'text': text,
            'language': lang_map.get(detected, detected),
            'word_count': word_count
        }
    
    def generate_summary(self, text, custom_instruction=None, max_length=512):
        """Generate summary with FLAN-T5 Large"""
        
        print(f"üìä Generating high-quality summary with FLAN-T5 Large...")
        
        # Prepare instruction
        if custom_instruction:
            input_text = f"{custom_instruction}: {text}"
        else:
            input_text = f"Provide a comprehensive summary including key points, decisions, requirements, and action items: {text}"
        
        # Tokenize
        inputs = self.tokenizer(
            input_text,
            return_tensors="pt",
            max_length=512,
            truncation=True,
            padding=True
        ).to(self.device)
        
        # Generate
        with torch.no_grad():
            summary_ids = self.model.generate(
                inputs["input_ids"],
                max_length=max_length,
                min_length=max_length // 4,
                num_beams=8,
                length_penalty=1.8,
                early_stopping=True,
                no_repeat_ngram_size=4,
                repetition_penalty=1.3,
                temperature=1.0,
                do_sample=False
            )
        
        summary = self.tokenizer.decode(
            summary_ids[0],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True
        )
        
        print(f"‚úÖ Summary generated: {len(summary.split())} words\n")
        
        return summary
    
    def extract_structured_info(self, summary_text):
        """Extract structured information from summary"""
        
        info = {
            'requirements': [],
            'decisions': [],
            'action_items': [],
            'timeline': [],
            'budget': [],
            'risks': [],
            'technical': [],
            'deliverables': [],
            'stakeholders': []
        }
        
        sentences = re.split(r'[.!?]+', summary_text)
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
            
            lower = sentence.lower()
            
            # Requirements
            if any(w in lower for w in ['require', 'need', 'must', 'should', 'shall', 'expect']):
                info['requirements'].append(sentence)
            
            # Decisions
            if any(w in lower for w in ['decide', 'agreed', 'approved', 'confirmed', 'finalized']):
                info['decisions'].append(sentence)
            
            # Action items
            if any(w in lower for w in ['will', 'task', 'action', 'assign', 'responsible', 'owner']):
                info['action_items'].append(sentence)
            
            # Timeline
            if any(w in lower for w in ['deadline', 'timeline', 'date', 'week', 'month', 'schedule', 'due']):
                info['timeline'].append(sentence)
            
            # Budget
            if any(w in lower for w in ['cost', 'budget', 'price', 'payment', 'fund', 'expense', '$', 'rs', 'rupee', 'inr']):
                info['budget'].append(sentence)
            
            # Risks
            if any(w in lower for w in ['risk', 'concern', 'issue', 'challenge', 'problem', 'blocker']):
                info['risks'].append(sentence)
            
            # Technical
            if any(w in lower for w in ['technical', 'technology', 'system', 'platform', 'api', 'database', 'infrastructure']):
                info['technical'].append(sentence)
            
            # Deliverables
            if any(w in lower for w in ['deliver', 'output', 'product', 'feature', 'component', 'milestone']):
                info['deliverables'].append(sentence)
            
            # Stakeholders
            if any(w in lower for w in ['stakeholder', 'team', 'department', 'client', 'customer', 'vendor']):
                info['stakeholders'].append(sentence)
        
        return info
    
    def generate_brd(self, summary_text, structured_info, metadata):
        """Generate Business Requirements Document"""
        
        doc = f"""
{'='*80}
BUSINESS REQUIREMENTS DOCUMENT (BRD)
{'='*80}

Document Information:
--------------------
Project Name:     {metadata.get('project_name', 'Audio Extracted Project')}
Document Date:    {metadata.get('date', datetime.now().strftime('%Y-%m-%d'))}
Version:          {metadata.get('version', '1.0')}
Prepared By:      {metadata.get('author', 'T5 Large Audio Analysis System')}
Status:           {metadata.get('status', 'Draft - Extracted from Audio')}
Department:       {metadata.get('department', 'TBD')}
Sponsor:          {metadata.get('sponsor', 'TBD')}


1. EXECUTIVE SUMMARY
{'='*80}

{summary_text}


2. BUSINESS OBJECTIVES
{'='*80}

Based on the audio discussion, the key business objectives are:

"""
        
        # Add objectives from summary
        if structured_info['requirements']:
            for idx, req in enumerate(structured_info['requirements'][:5], 1):
                doc += f"OBJ-{idx}: {req}\n"
        else:
            doc += "Business objectives to be refined based on stakeholder review.\n"
        
        doc += f"""

3. BUSINESS REQUIREMENTS
{'='*80}

"""
        
        if structured_info['requirements']:
            for idx, req in enumerate(structured_info['requirements'], 1):
                doc += f"BR-{idx:03d}: {req}\n"
                doc += f"         Priority: {metadata.get('priority', 'Medium')}\n"
                doc += f"         Status: New\n"
                doc += f"         Source: Audio Discussion\n\n"
        else:
            doc += "Business requirements extracted from executive summary above.\n"
        
        doc += f"""

4. FUNCTIONAL REQUIREMENTS
{'='*80}

"""
        
        if structured_info['technical']:
            for idx, tech in enumerate(structured_info['technical'], 1):
                doc += f"FR-{idx:03d}: {tech}\n"
                doc += f"         Category: {metadata.get('category', 'Technical')}\n"
                doc += f"         Priority: {metadata.get('priority', 'Medium')}\n\n"
        else:
            doc += "Functional requirements to be detailed in technical specification.\n"
        
        doc += f"""

5. STAKEHOLDERS
{'='*80}

"""
        
        if structured_info['stakeholders']:
            doc += "Stakeholders identified in discussion:\n\n"
            for stakeholder in structured_info['stakeholders']:
                doc += f"‚Ä¢ {stakeholder}\n"
        else:
            doc += f"""
Primary Stakeholders:
‚Ä¢ Project Sponsor: {metadata.get('sponsor', 'TBD')}
‚Ä¢ Business Owner: {metadata.get('business_owner', 'TBD')}
‚Ä¢ Project Manager: {metadata.get('pm', 'TBD')}
‚Ä¢ End Users: {metadata.get('end_users', 'As discussed in audio')}
"""
        
        doc += f"""

6. KEY DECISIONS
{'='*80}

"""
        
        if structured_info['decisions']:
            for idx, decision in enumerate(structured_info['decisions'], 1):
                doc += f"D{idx}. {decision}\n"
                doc += f"    Date: {metadata.get('date', 'TBD')}\n"
                doc += f"    Decision Maker: {metadata.get('decision_maker', 'TBD')}\n\n"
        else:
            doc += "Key decisions documented in executive summary.\n"
        
        doc += f"""

7. SCOPE
{'='*80}

In Scope:
"""
        
        if structured_info['deliverables']:
            for deliverable in structured_info['deliverables']:
                doc += f"‚Ä¢ {deliverable}\n"
        else:
            doc += "‚Ä¢ As defined in requirements above\n"
        
        doc += """

Out of Scope:
‚Ä¢ Items not mentioned in the audio discussion
‚Ä¢ Features to be considered for future phases

"""
        
        doc += f"""

8. TIMELINE & MILESTONES
{'='*80}

"""
        
        if structured_info['timeline']:
            for milestone in structured_info['timeline']:
                doc += f"‚Ä¢ {milestone}\n"
        else:
            doc += f"""
Project Timeline:
‚Ä¢ Requirements Phase: {metadata.get('req_phase', 'TBD')}
‚Ä¢ Design Phase: {metadata.get('design_phase', 'TBD')}
‚Ä¢ Development Phase: {metadata.get('dev_phase', 'TBD')}
‚Ä¢ Testing Phase: {metadata.get('test_phase', 'TBD')}
‚Ä¢ Deployment: {metadata.get('deployment', 'TBD')}
"""
        
        doc += f"""

9. BUDGET & RESOURCES
{'='*80}

"""
        
        if structured_info['budget']:
            for budget_item in structured_info['budget']:
                doc += f"‚Ä¢ {budget_item}\n"
        else:
            doc += f"""
Estimated Budget: {metadata.get('budget', 'To be determined')}

Resource Requirements:
‚Ä¢ Team Size: {metadata.get('team_size', 'TBD')}
‚Ä¢ Duration: {metadata.get('duration', 'TBD')}
‚Ä¢ External Resources: {metadata.get('external_resources', 'TBD')}
"""
        
        doc += f"""

10. RISKS & ASSUMPTIONS
{'='*80}

Risks Identified:
"""
        
        if structured_info['risks']:
            for idx, risk in enumerate(structured_info['risks'], 1):
                doc += f"{idx}. {risk}\n"
                doc += f"   Impact: {metadata.get('risk_impact', 'Medium')}\n"
                doc += f"   Mitigation: To be defined\n\n"
        else:
            doc += "Risk assessment to be conducted during project planning.\n"
        
        doc += """

Assumptions:
‚Ä¢ Resources will be available as per project timeline
‚Ä¢ Stakeholder approvals will be obtained in timely manner
‚Ä¢ Technical infrastructure is available and ready

"""
        
        doc += f"""

11. DEPENDENCIES
{'='*80}

‚Ä¢ Dependencies identified in audio discussion
‚Ä¢ External systems and integrations as required
‚Ä¢ Third-party services and vendors as needed


12. SUCCESS CRITERIA
{'='*80}

The project will be considered successful when:

‚Ä¢ All business requirements are met
‚Ä¢ System is deployed and operational
‚Ä¢ User acceptance testing is completed successfully
‚Ä¢ Stakeholders sign off on deliverables


13. APPROVAL
{'='*80}

This document has been reviewed and approved by:


Business Owner: _____________________    Date: ___________

Signature:      _____________________


Project Sponsor: ____________________    Date: ___________

Signature:       ____________________


{'='*80}
Document Generated from Audio Analysis using Whisper Large + FLAN-T5 Large
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
{'='*80}
"""
        
        return doc
    
    def generate_purchase_order(self, summary_text, structured_info, metadata):
        """Generate Purchase Order"""
        
        doc = f"""
{'='*80}
PURCHASE ORDER
{'='*80}

PO Number:        {metadata.get('po_number', 'PO-' + datetime.now().strftime('%Y%m%d-%H%M'))}
Date:             {metadata.get('date', datetime.now().strftime('%Y-%m-%d'))}
Status:           {metadata.get('status', 'Draft - Extracted from Audio')}


VENDOR INFORMATION:
{'='*80}
Vendor Name:      {metadata.get('vendor_name', 'TBD - As per audio discussion')}
Vendor Code:      {metadata.get('vendor_code', 'TBD')}
Address:          {metadata.get('vendor_address', 'TBD')}
City/State/ZIP:   {metadata.get('vendor_location', 'TBD')}
Contact Person:   {metadata.get('vendor_contact', 'TBD')}
Phone:            {metadata.get('vendor_phone', 'TBD')}
Email:            {metadata.get('vendor_email', 'TBD')}
GST/Tax ID:       {metadata.get('vendor_gst', 'TBD')}


BUYER INFORMATION:
{'='*80}
Company Name:     {metadata.get('company_name', 'Your Company Ltd.')}
Department:       {metadata.get('department', 'Procurement')}
Address:          {metadata.get('buyer_address', 'TBD')}
City/State/ZIP:   {metadata.get('buyer_location', 'TBD')}
Contact Person:   {metadata.get('buyer_contact', metadata.get('author', 'TBD'))}
Phone:            {metadata.get('buyer_phone', 'TBD')}
Email:            {metadata.get('buyer_email', 'TBD')}


PURCHASE ORDER SUMMARY:
{'='*80}

Based on Audio Discussion:
{summary_text}


DETAILED LINE ITEMS:
{'='*80}

"""
        
        # Extract items from deliverables or requirements
        items = structured_info['deliverables'] if structured_info['deliverables'] else structured_info['requirements']
        
        doc += f"{'Item':<5} {'Description':<45} {'Qty':<8} {'Unit':<10} {'Price':<12} {'Total':<12}\n"
        doc += "-" * 100 + "\n"
        
        if items:
            for idx, item in enumerate(items[:15], 1):  # Max 15 items
                clean_item = item.replace('\n', ' ')[:42]
                doc += f"{idx:<5} {clean_item:<45} {'TBD':<8} {'Each':<10} {'TBD':<12} {'TBD':<12}\n"
        else:
            doc += f"{'1':<5} {'Items/Services as per audio discussion':<45} {'TBD':<8} {'Each':<10} {'TBD':<12} {'TBD':<12}\n"
        
        doc += "\n"
        
        doc += f"""

COST BREAKDOWN:
{'='*80}

"""
        
        if structured_info['budget']:
            doc += "Cost Details (from audio discussion):\n\n"
            for budget_item in structured_info['budget']:
                doc += f"‚Ä¢ {budget_item}\n"
            doc += "\n"
        
        doc += f"""
Subtotal:                                                    {metadata.get('subtotal', 'TBD')}
Discount (if any):                                           {metadata.get('discount', '0.00')}
                                                             ___________
Subtotal after Discount:                                     {metadata.get('subtotal_after_discount', 'TBD')}

Tax/GST ({metadata.get('tax_rate', '18')}%):                                             {metadata.get('tax_amount', 'TBD')}
Shipping & Handling:                                         {metadata.get('shipping', 'TBD')}
Other Charges:                                               {metadata.get('other_charges', '0.00')}
                                                             ___________
TOTAL AMOUNT:                                                {metadata.get('total_amount', 'TBD')}
                                                             ===========


TERMS & CONDITIONS:
{'='*80}

Payment Terms:         {metadata.get('payment_terms', 'Net 30 Days')}
Delivery Terms:        {metadata.get('delivery_terms', 'FOB Destination')}
Expected Delivery:     {metadata.get('delivery_date', 'TBD - As per discussion')}
Delivery Address:      {metadata.get('delivery_address', 'As per buyer information above')}
Shipping Method:       {metadata.get('shipping_method', 'Standard')}
Warranty:              {metadata.get('warranty', 'As per vendor terms')}
Return Policy:         {metadata.get('return_policy', 'As per vendor terms')}


PAYMENT SCHEDULE:
{'='*80}

"""
        
        if metadata.get('payment_schedule'):
            doc += metadata['payment_schedule']
        else:
            doc += f"""
‚Ä¢ Advance Payment: {metadata.get('advance_payment', '0%')} on PO confirmation
‚Ä¢ Balance Payment: {metadata.get('balance_payment', '100%')} {metadata.get('payment_terms', 'Net 30')}
"""
        
        doc += f"""

SPECIAL INSTRUCTIONS:
{'='*80}

"""
        
        if structured_info['requirements']:
            doc += "Requirements from audio discussion:\n\n"
            for req in structured_info['requirements'][:5]:
                doc += f"‚Ä¢ {req}\n"
        else:
            doc += "As per audio discussion and mutual agreement.\n"
        
        doc += f"""

ADDITIONAL NOTES:
{'='*80}

"""
        
        if structured_info['action_items']:
            doc += "Action Items:\n\n"
            for action in structured_info['action_items'][:5]:
                doc += f"‚Ä¢ {action}\n"
        
        doc += f"""

VALIDITY:
{'='*80}

This Purchase Order is valid until: {metadata.get('validity_date', 'TBD')}


APPROVAL & AUTHORIZATION:
{'='*80}

Requested By:

Name:      {metadata.get('requested_by', 'TBD')}
Title:     {metadata.get('requested_title', 'TBD')}
Date:      {metadata.get('date', 'TBD')}
Signature: _____________________


Approved By:

Name:      {metadata.get('approved_by', 'TBD')}
Title:     {metadata.get('approved_title', 'Manager/Director')}
Date:      ___________
Signature: _____________________


Finance Approval:

Name:      {metadata.get('finance_approval', 'TBD')}
Title:     Finance Manager
Date:      ___________
Signature: _____________________


VENDOR ACCEPTANCE:
{'='*80}

We accept the terms and conditions of this Purchase Order:

Vendor Name:    {metadata.get('vendor_name', 'TBD')}
Authorized By:  _____________________
Title:          _____________________
Date:           ___________
Signature:      _____________________
Company Seal:   


{'='*80}
Purchase Order Generated from Audio Analysis
System: Whisper Large + FLAN-T5 Large
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
{'='*80}

IMPORTANT NOTES:
- This is a preliminary document extracted from audio discussion
- Please review and verify all details before finalization
- TBD items must be filled in before final approval
- Consult legal/procurement team for compliance review
"""
        
        return doc
    
    def process_audio_to_document(
        self,
        audio_path,
        document_type='brd',
        custom_instruction=None,
        metadata=None
    ):
        """
        Complete pipeline: Audio ‚Üí Summary ‚Üí Document
        
        Args:
            audio_path: Path to audio file
            document_type: 'brd' or 'purchase_order'
            custom_instruction: Custom instruction for T5
            metadata: Document metadata
        
        Returns:
            dict with transcription, summary, and formatted document
        """
        
        print("="*70)
        print(f"AUDIO TO {document_type.upper()} CONVERTER")
        print("="*70 + "\n")
        
        # Step 1: Transcribe
        print("STEP 1: Transcribing with Whisper Large...")
        transcription = self.transcribe_audio(audio_path)
        
        # Step 2: Generate Summary
        print("STEP 2: Generating summary with FLAN-T5 Large...")
        summary = self.generate_summary(
            transcription['text'],
            custom_instruction=custom_instruction
        )
        
        # Step 3: Extract structured information
        print("STEP 3: Extracting structured information...")
        structured_info = self.extract_structured_info(summary)
        
        # Step 4: Generate document
        print(f"STEP 4: Generating {document_type.upper()}...\n")
        
        if metadata is None:
            metadata = {}
        
        metadata.setdefault('project_name', os.path.basename(audio_path).split('.')[0])
        metadata.setdefault('date', datetime.now().strftime('%Y-%m-%d'))
        
        if document_type == 'brd':
            formatted_doc = self.generate_brd(summary, structured_info, metadata)
        elif document_type == 'purchase_order':
            formatted_doc = self.generate_purchase_order(summary, structured_info, metadata)
        else:
            raise ValueError(f"Unknown document type: {document_type}")
        
        # Step 5: Save
        output_filename = f"/kaggle/working/{document_type}_{metadata['project_name']}.txt"
        with open(output_filename, 'w', encoding='utf-8') as f:
            f.write(formatted_doc)
        
        print(f"‚úÖ {document_type.upper()} generated and saved!")
        print(f"üìÅ File: {output_filename}\n")
        
        return {
            'transcription': transcription['text'],
            'summary': summary,
            'structured_info': structured_info,
            'formatted_document': formatted_doc,
            'output_file': output_filename
        }


# ============================================================================
# USAGE EXAMPLES
# ============================================================================

if __name__ == "__main__":
    
    # ========================================================================
    # STEP 1: Load Models ONCE
    # ========================================================================
    
    print("="*70)
    print("INITIALIZING T5 LARGE DOCUMENT GENERATOR")
    print("="*70 + "\n")
    
    generator = SmartT5LargeDocumentGenerator(
        whisper_model="large",
        t5_model="google/flan-t5-large"
    )
    
    
    # ========================================================================
    # EXAMPLE 1: Generate BRD from Audio
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 1: GENERATE BRD FROM AUDIO")
    print("="*70 + "\n")
    
    brd_results = generator.process_audio_to_document(
        audio_path="/kaggle/input/audio/requirements_meeting.mp3",
        document_type='brd',
        custom_instruction="Extract all business requirements, decisions, timeline, and stakeholder information",
        metadata={
            'project_name': 'Mobile_App_Redesign',
            'version': '1.0',
            'status': 'Draft',
            'author': 'Business Analysis Team',
            'department': 'Product Development',
            'sponsor': 'VP of Product',
            'priority': 'High'
        }
    )
    
    print("BRD Summary Preview:")
    print(brd_results['summary'][:300] + "...\n")
    
    
    # ========================================================================
    # EXAMPLE 2: Generate Purchase Order from Audio
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 2: GENERATE PURCHASE ORDER FROM AUDIO")
    print("="*70 + "\n")
    
    po_results = generator.process_audio_to_document(
        audio_path="/kaggle/input/audio/vendor_discussion.mp3",
        document_type='purchase_order',
        custom_instruction="Extract vendor details, items to be purchased, quantities, costs, and delivery terms",
        metadata={
            'po_number': 'PO-2024-001',
            'vendor_name': 'ABC Technology Solutions Pvt Ltd',
            'vendor_address': '123 Tech Park, Bangalore',
            'vendor_contact': 'Mr. Rajesh Kumar',
            'vendor_phone': '+91 98765 43210',
            'vendor_email': 'rajesh@abctech.com',
            'vendor_gst': '29ABCDE1234F1Z5',
            'company_name': 'XYZ Enterprises Ltd',
            'department': 'IT Procurement',
            'payment_terms': 'Net 30 Days',
            'delivery_date': '2024-02-15',
            'shipping_method': 'Express Delivery',
            'tax_rate': '18'
        }
    )
    
    print("PO Summary Preview:")
    print(po_results['summary'][:300] + "...\n")
    
    
    # ========================================================================
    # EXAMPLE 3: Process Multiple Audio Files
    # ========================================================================
    
    print("\n" + "="*70)
    print("EXAMPLE 3: BATCH PROCESSING")
    print("="*70 + "\n")
    
    audio_documents = [
        {
            'path': '/kaggle/input/audio/meeting1.mp3',
            'type': 'brd',
            'metadata': {'project_name': 'Project_Alpha', 'version': '1.0'}
        },
        {
            'path': '/kaggle/input/audio/vendor_call.mp3',
            'type': 'purchase_order',
            'metadata': {'vendor_name': 'Vendor XYZ', 'po_number': 'PO-2024-002'}
        }
    ]
    
    all_results = []
    
    for item in audio_documents:
        print(f"\nProcessing: {item['path']}")
        
        try:
            results = generator.process_audio_to_document(
                audio_path=item['path'],
                document_type=item['type'],
                metadata=item['metadata']
            )
            all_results.append(results)
            print(f"‚úÖ Generated {item['type'].upper()}\n")
        except Exception as e:
            print(f"‚ùå Error: {str(e)}\n")
            continue
    
    print(f"‚úÖ Batch processing complete! Generated {len(all_results)} documents.")


# ============================================================================
# COMPLETE WORKFLOW EXAMPLE
# ============================================================================

"""
COMPLETE WORKFLOW:

# Cell 1: Installation
!pip install -q openai-whisper transformers sentencepiece accelerate

# Cell 2: Paste entire code above

# Cell 3: Load models ONCE
generator = SmartT5LargeDocumentGenerator(
    whisper_model="large",
    t5_model="google/flan-t5-large"
)

# Cell 4: Generate BRD
brd = generator.process_audio_to