In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal
from datetime import timedelta, datetime
import librosa
import librosa.display
import soundfile as sf
from sklearn.preprocessing import normalize
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, Text, Boolean, LargeBinary
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import sqlite3
from io import BytesIO
import base64
import requests
import tempfile
import urllib.parse
from pathlib import Path

# Database setup
Base = declarative_base()

class AdDetectionResult(Base):
    __tablename__ = 'ad_detection_results'
    
    id = Column(Integer, primary_key=True, autoincrement=True)
    brand = Column(String(100), nullable=False)
    description = Column(Text, nullable=False)
    start_time_seconds = Column(Float, nullable=False)
    end_time_seconds = Column(Float, nullable=False)
    duration_seconds = Column(Float, nullable=False)
    correlation_score = Column(Float, nullable=True)
    raw_correlation = Column(Float, nullable=True)
    mfcc_correlation = Column(Float, nullable=True)
    overlap_duration = Column(Float, nullable=True)
    radio_recording_file = Column(String(255), nullable=False)
    master_file_name = Column(String(255), nullable=True)
    detection_timestamp = Column(DateTime, default=datetime.now)
    processing_status = Column(String(50), default='completed')
    total_matches_found = Column(Integer, default=0)
    broadcast_url = Column(String(500), nullable=True)  # Store original URL

class ExcelReports(Base):
    __tablename__ = 'excel_reports'
    
    id = Column(Integer, primary_key=True, autoincrement=True)
    radio_recording_file = Column(String(255), nullable=False, unique=True)
    excel_data = Column(LargeBinary, nullable=False)  # Store Excel file as binary
    excel_filename = Column(String(255), nullable=False)
    created_timestamp = Column(DateTime, default=datetime.now)
    total_ads_detected = Column(Integer, default=0)
    file_size_bytes = Column(Integer, default=0)
    broadcast_url = Column(String(500), nullable=True)  # Store original URL

# Utility functions
def seconds_to_standard_time(seconds):
    return str(timedelta(seconds=seconds)).split('.')[0]

def extract_brand_name(filename):
    """Extract brand name from filename - everything before the first underscore"""
    if '_' in filename:
        return filename.split('_')[0]
    else:
        return os.path.splitext(filename)[0]

def download_audio_from_url(url, download_folder="temp_downloads"):
    """
    Download audio file from URL and return local file path
    """
    try:
        # Create download folder if it doesn't exist
        os.makedirs(download_folder, exist_ok=True)
        
        print(f" Downloading audio from URL...")
        print(f"   URL: {url}")
        
        # Get filename from URL or generate one
        parsed_url = urllib.parse.urlparse(url)
        filename = os.path.basename(parsed_url.path)
        
        # If no filename in URL, generate one based on timestamp
        if not filename or not any(filename.endswith(ext) for ext in ['.mp3', '.wav', '.m4a', '.flac']):
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            # Try to detect format from Content-Type header
            try:
                response = requests.head(url, timeout=10)
                content_type = response.headers.get('content-type', '').lower()
                if 'mp3' in content_type:
                    filename = f"broadcast_{timestamp}.mp3"
                elif 'wav' in content_type:
                    filename = f"broadcast_{timestamp}.wav"
                elif 'mp4' in content_type or 'm4a' in content_type:
                    filename = f"broadcast_{timestamp}.m4a"
                else:
                    filename = f"broadcast_{timestamp}.mp3"  # Default to mp3
            except:
                filename = f"broadcast_{timestamp}.mp3"
        
        local_path = os.path.join(download_folder, filename)
        
        # Download the file
        response = requests.get(url, stream=True, timeout=30)
        response.raise_for_status()
        
        total_size = int(response.headers.get('content-length', 0))
        downloaded_size = 0
        
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    downloaded_size += len(chunk)
                    if total_size > 0:
                        progress = (downloaded_size / total_size) * 100
                        print(f"\r   Progress: {progress:.1f}% ({downloaded_size / (1024*1024):.1f} MB)", end='', flush=True)
        
        print(f"\n✓ Downloaded successfully: {filename}")
        print(f"   Size: {os.path.getsize(local_path) / (1024*1024):.1f} MB")
        print(f"   Saved to: {local_path}")
        
        return local_path, filename
        
    except requests.exceptions.RequestException as e:
        print(f"✗ Error downloading from URL: {e}")
        return None, None
    except Exception as e:
        print(f"✗ Unexpected error during download: {e}")
        return None, None

def load_audio(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=22050, mono=True)
        return audio, sr
    except Exception as e:
        print(f"Error loading {file_path} with librosa: {e}")
        try:
            sr, audio = wavfile.read(file_path)
            if len(audio.shape) > 1:
                audio = np.mean(audio, axis=1)
            if audio.dtype != np.float32 and audio.dtype != np.float64:
                audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max
            if sr != 22050:
                audio = librosa.resample(audio, orig_sr=sr, target_sr=22050)
                sr = 22050
            return audio, sr
        except Exception as e2:
            print(f"Error loading {file_path} with scipy: {e2}")
            return None, None

def preprocess_audio(audio, sr):
    """Preprocess audio for better matching"""
    audio = audio / (np.max(np.abs(audio)) + 1e-8)
    
    pre_emphasis = 0.97
    audio = np.append(audio[0], audio[1:] - pre_emphasis * audio[:-1])
    
    nyquist = sr / 2
    low = 300 / nyquist
    high = 3400 / nyquist
    
    if low < 1.0 and high < 1.0:
        b, a = signal.butter(4, [low, high], btype='band')
        audio = signal.filtfilt(b, a, audio)
    
    return audio

def extract_mfcc_features(audio, sr, n_mfcc=13):
    """Extract MFCC features for better audio matching"""
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=2048, hop_length=512)
    mfcc_delta = librosa.feature.delta(mfccs)
    mfcc_delta2 = librosa.feature.delta(mfccs, order=2)
    
    features = np.vstack([mfccs, mfcc_delta, mfcc_delta2])
    return features

def compute_feature_correlation(master_features, recording_features):
    """Compute correlation between feature vectors"""
    master_norm = normalize(master_features, axis=0)
    recording_norm = normalize(recording_features, axis=0)
    
    correlations = []
    for i in range(master_norm.shape[0]):
        corr = signal.correlate(recording_norm[i], master_norm[i], mode='full')
        correlations.append(corr)
    
    avg_correlation = np.mean(correlations, axis=0)
    return avg_correlation

def normalize_signal(signal):
    return (signal - np.mean(signal)) / (np.std(signal) + 1e-10)

def find_matches_improved(master_audio, master_sr, radio_audio, radio_sr, threshold=0.65):
    """
    Improved ad detection using normalized cross-correlation.
    Works with .mp3 or .wav master files.
    """

    # Step 1: Resample ad audio to match radio sampling rate
    if master_sr != radio_sr:
        master_audio = librosa.resample(master_audio, orig_sr=master_sr, target_sr=radio_sr)
        master_sr = radio_sr

    # Step 2: Normalize both signals
    master_audio = normalize_signal(master_audio)
    radio_audio = normalize_signal(radio_audio)

    # Step 3: Cross-correlation
    correlation = correlate(radio_audio, master_audio, mode='valid')
    correlation /= len(master_audio)

    matches = []
    ad_duration = len(master_audio) / radio_sr

    i = 0
    while i < len(correlation):
        if correlation[i] >= threshold:
            start_time = i / radio_sr
            end_time = start_time + ad_duration
            matches.append({
                'start_time': start_time,
                'end_time': end_time,
                'duration': ad_duration,
                'correlation': float(round(correlation[i], 4))
            })
            # Skip forward by ad duration to avoid overlap
            i += int(ad_duration * radio_sr)
        else:
            i += 1

    return matches

# Enhanced Database Manager Class
class EnhancedRadioRecordingManager:
    def __init__(self, db_path="radio_ad_detection.db"):
        self.db_path = db_path
        self.engine = create_engine(f'sqlite:///{db_path}')
        Base.metadata.create_all(self.engine)
        Session = sessionmaker(bind=self.engine)
        self.Session = Session
    
    def save_detection_results(self, matches_dict, radio_filename, broadcast_url=None):
        """Save detection results to database for a specific radio recording"""
        session = self.Session()
        
        try:
            # Clear existing results for this radio file (for reprocessing)
            session.query(AdDetectionResult).filter(
                AdDetectionResult.radio_recording_file == radio_filename
            ).delete()
            
            # Process and filter matches
            all_matches = []
            for master_name, matches in matches_dict.items():
                for match in matches:
                    all_matches.append({
                        'master_name': master_name,
                        'start_time': match['start_time'],
                        'end_time': match['end_time'],
                        'duration': match['duration'],
                        'correlation': match['correlation'],
                        'overlap_duration': match.get('overlap_duration', match['duration']),
                        'raw_correlation': match.get('raw_correlation', match['correlation']),
                        'mfcc_correlation': match.get('mfcc_correlation', match['correlation'])
                    })
            
            # Sort and filter overlapping matches
            all_matches.sort(key=lambda x: x['start_time'])
            final_matches = self._filter_overlapping_matches(all_matches)
            
            # Save to database
            for match in final_matches:
                brand_name = extract_brand_name(match['master_name'])
                description = os.path.splitext(match['master_name'])[0]
                
                db_record = AdDetectionResult(
                    brand=brand_name,
                    description=description,
                    start_time_seconds=match['start_time'],
                    end_time_seconds=match['end_time'],
                    duration_seconds=match['duration'],
                    correlation_score=match['correlation'],
                    raw_correlation=match['raw_correlation'],
                    mfcc_correlation=match['mfcc_correlation'],
                    overlap_duration=match['overlap_duration'],
                    radio_recording_file=radio_filename,
                    master_file_name=match['master_name'],
                    total_matches_found=len(final_matches),
                    broadcast_url=broadcast_url
                )
                session.add(db_record)
            
            session.commit()
            print(f" Saved {len(final_matches)} detection results for {radio_filename}")
            
            # Generate and store Excel report
            self._generate_and_store_excel(radio_filename, final_matches, broadcast_url)
            
            return len(final_matches)
            
        except Exception as e:
            session.rollback()
            print(f" Error saving results: {e}")
            return 0
        finally:
            session.close()
    
    def _filter_overlapping_matches(self, all_matches):
        """Apply filtering logic for overlapping matches"""
        final_matches = []
        for match in all_matches:
            should_keep = True
            
            for i, existing_match in enumerate(final_matches):
                overlap_start = max(match['start_time'], existing_match['start_time'])
                overlap_end = min(match['end_time'], existing_match['end_time'])
                overlap_duration = max(0, overlap_end - overlap_start)
                
                min_duration = min(match['duration'], existing_match['duration'])
                
                if overlap_duration > (0.4 * min_duration):
                    current_score = match['correlation']
                    existing_score = existing_match['correlation']
                    
                    if current_score > existing_score:
                        final_matches[i] = match
                        should_keep = False
                        break
                    else:
                        should_keep = False
                        break
            
            if should_keep:
                final_matches.append(match)
        
        return final_matches
    
    def _generate_and_store_excel(self, radio_filename, final_matches, broadcast_url=None):
        """Generate Excel and store in database - WITHOUT correlation score"""
        session = self.Session()
        try:
            # Create Excel in memory
            output = BytesIO()
            
            # Prepare data - REMOVED correlation score column
            data = []
            header_data = {
                'Brand': 'Brand',
                'Description': 'Description',
                'Start Time (HH:MM:SS)': 'Start Time (HH:MM:SS)',
                'End Time (HH:MM:SS)': 'End Time (HH:MM:SS)',
                'Ad Duration (HH:MM:SS)': 'Ad Duration (HH:MM:SS)'
            }
            data.append(header_data)
            
            for match in final_matches:
                start_rounded = max(0, round(match['start_time']))
                end_rounded = round(match['end_time'])
                duration_rounded = end_rounded - start_rounded
                
                data.append({
                    'Brand': extract_brand_name(match['master_name']),
                    'Description': os.path.splitext(match['master_name'])[0],
                    'Start Time (HH:MM:SS)': seconds_to_standard_time(start_rounded),
                    'End Time (HH:MM:SS)': seconds_to_standard_time(end_rounded),
                    'Ad Duration (HH:MM:SS)': seconds_to_standard_time(abs(duration_rounded))
                })
            
            df = pd.DataFrame(data)
            
            with pd.ExcelWriter(output, engine='openpyxl') as writer:
                df.to_excel(writer, sheet_name='Ad Detection Results', index=False, header=False)
                
                workbook = writer.book
                worksheet = writer.sheets['Ad Detection Results']
                
                from openpyxl.styles import PatternFill, Font, Alignment
                
                yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
                bold_font = Font(bold=True)
                center_alignment = Alignment(horizontal="center")
                
                # Format header row
                for col in range(1, len(df.columns) + 1):
                    cell = worksheet.cell(row=1, column=col)
                    cell.fill = yellow_fill
                    cell.font = bold_font
                    cell.alignment = center_alignment
                
                # Adjust column widths - REMOVED correlation score column width
                worksheet.column_dimensions['A'].width = 20
                worksheet.column_dimensions['B'].width = 60
                worksheet.column_dimensions['C'].width = 18
                worksheet.column_dimensions['D'].width = 18
                worksheet.column_dimensions['E'].width = 20
            
            excel_data = output.getvalue()
            
            # Store Excel in database
            excel_filename = f"detection_results_{radio_filename.replace('.mp3', '').replace('.wav', '')}.xlsx"
            
            # Remove existing Excel report for this radio file
            session.query(ExcelReports).filter(
                ExcelReports.radio_recording_file == radio_filename
            ).delete()
            
            excel_record = ExcelReports(
                radio_recording_file=radio_filename,
                excel_data=excel_data,
                excel_filename=excel_filename,
                total_ads_detected=len(final_matches),
                file_size_bytes=len(excel_data),
                broadcast_url=broadcast_url
            )
            session.add(excel_record)
            session.commit()
            
            print(f"✓ Excel report stored in database for {radio_filename}")
            
        except Exception as e:
            session.rollback()
            print(f"✗ Error storing Excel: {e}")
        finally:
            session.close()
    
    def get_radio_recordings_list(self):
        """Get list of all processed radio recordings"""
        session = self.Session()
        try:
            recordings = session.query(AdDetectionResult.radio_recording_file).distinct().all()
            return [r[0] for r in recordings]
        except Exception as e:
            print(f"Error getting recordings list: {e}")
            return []
        finally:
            session.close()
    
    def download_excel_by_filename(self, radio_filename, save_path=None):
        """Download Excel file from database by radio filename"""
        session = self.Session()
        try:
            excel_record = session.query(ExcelReports).filter(
                ExcelReports.radio_recording_file == radio_filename
            ).first()
            
            if not excel_record:
                print(f"No Excel report found for: {radio_filename}")
                return None
            
            if save_path is None:
                save_path = excel_record.excel_filename
            
            with open(save_path, 'wb') as f:
                f.write(excel_record.excel_data)
            
            print(f"✓ Excel downloaded: {save_path}")
            print(f"  - Total ads detected: {excel_record.total_ads_detected}")
            print(f"  - File size: {excel_record.file_size_bytes} bytes")
            print(f"  - Created: {excel_record.created_timestamp}")
            if excel_record.broadcast_url:
                print(f"  - Original URL: {excel_record.broadcast_url}")
            
            return save_path
            
        except Exception as e:
            print(f" Error downloading Excel: {e}")
            return None
        finally:
            session.close()
    
    def get_all_available_reports(self):
        """Get information about all available Excel reports"""
        session = self.Session()
        try:
            reports = session.query(ExcelReports).order_by(ExcelReports.created_timestamp.desc()).all()
            
            if not reports:
                print("No reports available in database.")
                return []
            
            print(f"\n Available Reports ({len(reports)}):")
            print("=" * 80)
            
            report_info = []
            for i, report in enumerate(reports, 1):
                info = {
                    'id': report.id,
                    'radio_file': report.radio_recording_file,
                    'excel_filename': report.excel_filename,
                    'ads_detected': report.total_ads_detected,
                    'created': report.created_timestamp.strftime('%Y-%m-%d %H:%M:%S'),
                    'size_kb': round(report.file_size_bytes / 1024, 2),
                    'broadcast_url': report.broadcast_url
                }
                report_info.append(info)
                
                print(f"{i:2d}. {report.radio_recording_file}")
                print(f"     Excel: {report.excel_filename}")
                print(f"     Ads: {report.total_ads_detected} |  Created: {info['created']} |  Size: {info['size_kb']} KB")
                if report.broadcast_url:
                    print(f"     URL: {report.broadcast_url[:80]}...")
                print()
            
            return report_info
            
        except Exception as e:
            print(f"Error getting reports: {e}")
            return []
        finally:
            session.close()
    
    def get_detection_summary(self, radio_filename):
        """Get detailed summary of detection results"""
        session = self.Session()
        try:
            results = session.query(AdDetectionResult).filter(
                AdDetectionResult.radio_recording_file == radio_filename
            ).order_by(AdDetectionResult.start_time_seconds.asc()).all()
            
            if not results:
                print(f"No results found for: {radio_filename}")
                return None
            
            print(f"\n Detection Summary for: {radio_filename}")
            print("=" * 60)
            print(f"Total Ads Detected: {len(results)}")
            print(f"Processing Date: {results[0].detection_timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
            if results[0].broadcast_url:
                print(f"Original URL: {results[0].broadcast_url}")
            print()
            
            # Brand summary
            brand_counts = {}
            total_duration = 0
            
            for result in results:
                brand_counts[result.brand] = brand_counts.get(result.brand, 0) + 1
                total_duration += result.duration_seconds
            
            print(" Brand Breakdown:")
            for brand, count in sorted(brand_counts.items(), key=lambda x: x[1], reverse=True):
                print(f"  {brand}: {count} ads")
            
            print(f"\n⏱️  Total Ad Duration: {seconds_to_standard_time(total_duration)}")
            print(f" Average Correlation Score: {np.mean([r.correlation_score for r in results]):.4f}")
            
            return {
                'total_ads': len(results),
                'brands': brand_counts,
                'total_duration': total_duration,
                'avg_correlation': np.mean([r.correlation_score for r in results])
            }
            
        except Exception as e:
            print(f"Error getting summary: {e}")
            return None
        finally:
            session.close()

# Main processing function for URL-based audio
def process_broadcast_from_url(broadcast_url, ad_masters_folder="ad_masters", 
                              correlation_threshold=0.65, cleanup_temp=True, db_manager=None):
    """
    Process a radio broadcast from URL and save everything to database
    
    Args:
        broadcast_url (str): URL to the audio broadcast file
        ad_masters_folder (str): Path to folder containing ad master files
        correlation_threshold (float): Correlation threshold for ad detection
        cleanup_temp (bool): Whether to delete downloaded file after processing
        db_manager: Database manager instance (optional)
    
    Returns:
        dict: Processing results with filename, matches count, and file paths
    """
    
    if db_manager is None:
        db_manager = EnhancedRadioRecordingManager()
    
    print(f"\n Processing Radio Broadcast from URL")
    print("=" * 60)
    print(f"URL: {broadcast_url}")
    print()
    
    # Step 1: Download audio from URL
    downloaded_path, radio_filename = download_audio_from_url(broadcast_url)
    if not downloaded_path:
        return {
            'success': False,
            'error': 'Failed to download audio from URL',
            'broadcast_url': broadcast_url
        }
    
    try:
        # Step 2: Load advertisement masters
        print("\n Loading advertisement masters...")
        masters = {}
        for filename in os.listdir(ad_masters_folder):
            if filename.endswith(('.wav', '.mp3')):
                filepath = os.path.join(ad_masters_folder, filename)
                audio, sr = load_audio(filepath)
                if audio is not None:
                    masters[filename] = {
                        'audio': audio,
                        'sr': sr,
                        'duration': len(audio) / sr
                    }
        
        print(f"✓ Loaded {len(masters)} advertisement masters")
        
        # Step 3: Load radio recording
        print("\n Loading downloaded radio broadcast...")
        radio_recording, radio_sr = load_audio(downloaded_path)
        if radio_recording is None:
            return {
                'success': False,
                'error': 'Could not load downloaded audio file',
                'broadcast_url': broadcast_url,
                'downloaded_file': downloaded_path
            }
        
        radio_duration = len(radio_recording) / radio_sr
        print(f"✓ Loaded radio recording")
        print(f"   Duration: {seconds_to_standard_time(radio_duration)}")
        print(f"   Sample rate: {radio_sr} Hz")
        print(f"   File size: {os.path.getsize(downloaded_path) / (1024*1024):.1f} MB")
        
        # Step 4: Find matches
        print("\n Finding advertisement matches...")
        all_matches = {}
        total_matches = 0
        
        for master_name, master_data in masters.items():
            print(f"   Analyzing: {master_name}...", end='', flush=True)
            matches = find_matches_improved(
                master_data['audio'], 
                master_data['sr'], 
                radio_recording, 
                radio_sr, 
                threshold=correlation_threshold
            )
            all_matches[master_name] = matches
            total_matches += len(matches)
            
            if len(matches) > 0:
                print(f" ✓ {len(matches)} matches")
            else:
                print(f" - No matches")
        
        print(f"\n Total raw matches found: {total_matches}")
        
        # Step 5: Save to database (this also generates and stores Excel)
        final_matches = db_manager.save_detection_results(all_matches, radio_filename, broadcast_url)
        
        # Step 6: Show results
        if final_matches > 0:
            print(f"\n Processing completed successfully!")
            print(f"   Final matches after filtering: {final_matches}")
            print(f"   Results saved to database")
            print(f"   Excel report generated and stored")
            
            # Show summary
            summary = db_manager.get_detection_summary(radio_filename)
            
            result = {
                'success': True,
                'broadcast_url': broadcast_url,
                'radio_filename': radio_filename,
                'downloaded_file': downloaded_path,
                'total_matches': final_matches,
                'processing_summary': summary
            }
        else:
            print(f"\n No advertisement matches found above threshold ({correlation_threshold})")
            result = {
                'success': True,
                'broadcast_url': broadcast_url,
                'radio_filename': radio_filename,
                'downloaded_file': downloaded_path,
                'total_matches': 0,
                'message': 'No matches found'
            }
        
        return result
        
    except Exception as e:
        print(f"\n Error during processing: {e}")
        return {
            'success': False,
            'error': str(e),
            'broadcast_url': broadcast_url,
            'downloaded_file': downloaded_path
        }
    
    finally:
        # Cleanup downloaded file if requested
        if cleanup_temp and downloaded_path and os.path.exists(downloaded_path):
            try:
                os.remove(downloaded_path)
                print(f"\n Cleaned up temporary file: {radio_filename}")
            except Exception as e:
                print(f"\n  Could not remove temporary file: {e}")

# Legacy function for backward compatibility
def process_single_radio_clip(ad_masters_folder, radio_recording_file_path, 
                            correlation_threshold=0.65, db_manager=None):
    """Process a single radio recording clip and save everything to database"""
    
    if db_manager is None:
        db_manager = EnhancedRadioRecordingManager()
    
    radio_filename = os.path.basename(radio_recording_file_path)
    
    print(f"\n Processing: {radio_filename}")
    print("=" * 50)
    
    # Load advertisement masters
    print(" Loading advertisement masters...")
    masters = {}
    for filename in os.listdir(ad_masters_folder):
        if filename.endswith(('.wav', '.mp3')):
            filepath = os.path.join(ad_masters_folder, filename)
            audio, sr = load_audio(filepath)
            if audio is not None:
                masters[filename] = {
                    'audio': audio,
                    'sr': sr,
                    'duration': len(audio) / sr
                }
    
    print(f"✓ Loaded {len(masters)} advertisement masters")
    
    # Load radio recording
    print(" Loading radio recording...")
    radio_recording, radio_sr = load_audio(radio_recording_file_path)
    if radio_recording is None:
        print(f"✗ Error: Could not load radio recording")
        return False
    
    radio_duration = len(radio_recording) / radio_sr
    print(f"✓ Loaded radio recording (Duration: {seconds_to_standard_time(radio_duration)})")
    
    # Find matches
    print(" Finding advertisement matches...")
    all_matches = {}
    total_matches = 0
    
    for master_name, master_data in masters.items():
        matches = find_matches_improved(
            master_data['audio'], 
            master_data['sr'], 
            radio_recording, 
            radio_sr, 
            threshold=correlation_threshold
        )
        all_matches[master_name] = matches
        total_matches += len(matches)
        if len(matches) > 0:
            print(f"  ✓ {master_name}: {len(matches)} matches")
    
    print(f" Total raw matches found: {total_matches}")
    
    # Save to database (this also generates and stores Excel)
    final_matches = db_manager.save_detection_results(all_matches, radio_filename)
    
    if final_matches > 0:
        print(f" Processing completed successfully!")
        print(f"    Final matches: {final_matches}")
        print(f"    Results saved to database")
        print(f"    Excel report generated and stored")
        
        # Show summary
        db_manager.get_detection_summary(radio_filename)
        
        return True
    else:
        print(f" No matches found above threshold")
        return False

# Convenience functions for database access
def fetch_excel_report(radio_filename, download_path=None):
    """Fetch Excel report from database by radio filename"""
    db_manager = EnhancedRadioRecordingManager()
    return db_manager.download_excel_by_filename(radio_filename, download_path)

def list_all_reports():
    """List all available reports in database"""
    db_manager = EnhancedRadioRecordingManager()
    return db_manager.get_all_available_reports()

def get_report_summary(radio_filename):
    """Get detailed summary of a specific report"""
    db_manager = EnhancedRadioRecordingManager()
    return db_manager.get_detection_summary(radio_filename)

# Legacy function for processing current audio clip (backward compatibility)
def process_current_audio_clip(ad_masters_folder="ad_masters", radio_recording_folder="radio_recording", 
                              correlation_threshold=0.65):
    """Process the single audio clip currently in radio_recording folder"""
    
    # Get the single audio file in the folder
    audio_files = []
    for filename in os.listdir(radio_recording_folder):
        if filename.endswith(('.wav', '.mp3')):
            audio_files.append(filename)
    
    if len(audio_files) == 0:
        print(" No audio files found in radio_recording folder")
        return False
    elif len(audio_files) > 1:
        print(f"  Multiple audio files found. Processing the first one: {audio_files[0]}")
    
    audio_file = audio_files[0]
    audio_path = os.path.join(radio_recording_folder, audio_file)
    
    # Process the single clip
    success = process_single_radio_clip(ad_masters_folder, audio_path, correlation_threshold)
    
    if success:
        print(f"\n Successfully processed: {audio_file}")
        print(" You can now:")
        print(f"   1. View reports: list_all_reports()")
        print(f"   2. Download Excel: fetch_excel_report('{audio_file}')")
        print(f"   3. Get summary: get_report_summary('{audio_file}')")
        return audio_file
    else:
        print(f"\n Failed to process: {audio_file}")
        return None

# Quick access functions
def download_latest_report(save_folder="downloads"):
    """Download the most recent Excel report"""
    os.makedirs(save_folder, exist_ok=True)
    reports = list_all_reports()
    if reports:
        latest = reports[0]  # Reports are sorted by creation time desc
        save_path = os.path.join(save_folder, latest['excel_filename'])
        return fetch_excel_report(latest['radio_file'], save_path)
    else:
        print(" No reports available")
        return None

def download_report_by_radio_name(radio_filename, save_folder="downloads"):
    """Download Excel report by radio filename"""
    os.makedirs(save_folder, exist_ok=True)
    save_path = os.path.join(save_folder, f"report_{radio_filename.replace('.mp3', '').replace('.wav', '')}.xlsx")
    return fetch_excel_report(radio_filename, save_path)

# Enhanced main function that accepts URL
def main(broadcast_url=None, ad_masters_folder="ad_masters", correlation_threshold=0.65, 
         cleanup_temp=True):
    """
    Main function that can process either URL or local files
    
    Args:
        broadcast_url (str, optional): URL to the broadcast audio file
        ad_masters_folder (str): Path to ad masters folder
        correlation_threshold (float): Correlation threshold for detection
        cleanup_temp (bool): Whether to cleanup downloaded files
    
    Returns:
        dict or str: Processing results or processed filename
    """
    
    print(" Radio Advertisement Detection System")
    print("=" * 60)
    
    if broadcast_url:
        # Process from URL
        print(f" Processing broadcast from URL...")
        result = process_broadcast_from_url(
            broadcast_url=broadcast_url,
            ad_masters_folder=ad_masters_folder,
            correlation_threshold=correlation_threshold,
            cleanup_temp=cleanup_temp
        )
        
        if result['success']:
            radio_filename = result['radio_filename']
            print(f"\n Available commands for this broadcast:")
            print(f"   • list_all_reports()")
            print(f"   • fetch_excel_report('{radio_filename}')")
            print(f"   • get_report_summary('{radio_filename}')")
            print(f"   • download_latest_report()")
            
            return result
        else:
            print(f"\n Processing failed: {result.get('error', 'Unknown error')}")
            return result
    
    else:
        # Legacy mode - process from radio_recording folder
        print(f" Processing from local radio_recording folder...")
        processed_file = process_current_audio_clip(
            ad_masters_folder=ad_masters_folder,
            correlation_threshold=correlation_threshold
        )
        
        if processed_file:
            print(f"\n Available commands:")
            print(f"   • list_all_reports()")
            print(f"   • fetch_excel_report('{processed_file}')")
            print(f"   • get_report_summary('{processed_file}')")
            print(f"   • download_latest_report()")
            
            return processed_file
        else:
            return None

# When run as script
if __name__ == "__main__":
    import sys
    
    if len(sys.argv) > 1:
        # URL provided as command line argument
        broadcast_url = sys.argv[1]
        print(f" Processing broadcast from command line URL:")
        result = main(broadcast_url)
    else:        
        print("1. main('YOUR_BROADCAST_URL')                    # Process from URL")
        print("2. process_broadcast_from_url('URL')             # Direct URL processing") 
        print("3. list_all_reports()                            # Show all reports")
        print("4. fetch_excel_report('filename.mp3')           # Download specific Excel")
        print("5. get_report_summary('filename.mp3')           # Show detection summary")
        print("6. download_latest_report()                     # Quick download latest")
        print("7. main()                                       # Legacy local file mode")
        print()




 Processing broadcast from command line URL:
 Radio Advertisement Detection System
 Processing broadcast from URL...

 Processing Radio Broadcast from URL
URL: -f

 Downloading audio from URL...
   URL: -f
✗ Error downloading from URL: Invalid URL '-f': No scheme supplied. Perhaps you meant https://-f?

 Processing failed: Failed to download audio from URL


  Base = declarative_base()


In [2]:
pip freeze > requirements.txt


Note: you may need to restart the kernel to use updated packages.
