In [3]:
def create_audio_synced_video(song_id=songId, plot_analysis=True, max_duration=None):
    """Create video with audio-analysis-based lyrics synchronization - WITH DURATION LIMIT"""
    # Get song data
    sql = '''
    SELECT * FROM songs
    WHERE name LIKE "imagine"
    '''
    songs = pd.read_sql(sql, engine)
    songId = songs['id'].iloc[0]
    try:
        # Get song data
        query = f"""
        SELECT s.name as song_name, s.location as audio_file,
               l.content as lyrics, a.first_name, a.last_name
        FROM songs s 
        JOIN lyrics l ON s.id = l.song_id 
        JOIN artists a ON s.artist_id = a.id 
        WHERE s.id = {song_id}
        """
        
        df = pd.read_sql(query, engine)
        song_data = df.iloc[0]
        
        print(f"🎵 Creating AUDIO-SYNCED video for: {song_data['song_name']}")
        
        # Construct file paths
        audio_dir = os.path.join(r"C:\ruby\music\public\uploads\song\location", str(song_id))
        audio_path = os.path.join(audio_dir, song_data['audio_file'])
        background_image_path = os.path.join(audio_dir, "Folder.jpg")
        
        print(f"🔊 Audio: {os.path.basename(audio_path)}")
        print(f"🖼️ Background: {os.path.basename(background_image_path)}")
        
        if not os.path.exists(audio_path):
            print("❌ Audio file not found")
            return None
        
        # Perform audio analysis
        audio_analysis = detect_vocal_segments(audio_path, plot_analysis=plot_analysis)
        
        # Load audio clip
        audio_clip = AudioFileClip(audio_path)
        full_duration = audio_analysis.get('duration', audio_clip.duration)
        
        # APPLY DURATION LIMIT
        if max_duration:
            duration = min(max_duration, full_duration)
            print(f"⏱️ Using LIMITED duration: {duration:.1f}s (max_duration={max_duration}s)")
        else:
            duration = full_duration
            print(f"⏱️ Using FULL duration: {duration:.1f}s")
        
        # Trim audio if needed
        if max_duration and full_duration > max_duration:
            audio_clip = audio_clip.subclip(0, duration)
        
        # Assign lyrics to timing segments (within the limited duration)
        lyrics_with_timing = assign_lyrics_to_segments(song_data['lyrics'], audio_analysis)
        
        if not lyrics_with_timing:
            print("❌ Could not assign lyrics timing")
            return None
        
        # Filter lyrics to only include those within the limited duration
        if max_duration:
            lyrics_with_timing = [lyric for lyric in lyrics_with_timing if lyric['start_time'] < duration]
            # Adjust the end time of the last lyric to match the limited duration
            if lyrics_with_timing and lyrics_with_timing[-1]['end_time'] > duration:
                lyrics_with_timing[-1]['end_time'] = duration
        
        print(f"📝 Successfully assigned {len(lyrics_with_timing)} lyrics lines")
        print(f"⏱️ Video duration: {duration:.1f}s ({duration/60:.1f} minutes)")
        
        # Video settings
        fps = 24
        width, height = 640, 480
        
        def make_synced_frame(t):
            try:
                # Load background
                if os.path.exists(background_image_path):
                    bg_image = Image.open(background_image_path)
                    bg_image = bg_image.resize((width, height), Image.Resampling.LANCZOS)
                    frame = np.array(bg_image)
                else:
                    frame = np.full((height, width, 3), [40, 40, 80], dtype=np.uint8)
                
                # Convert to PIL for text drawing
                pil_img = Image.fromarray(frame)
                draw = ImageDraw.Draw(pil_img)
                
                # Load font
                try:
                    font = ImageFont.truetype("arial.ttf", 32)
                except:
                    try:
                        font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 32)
                    except:
                        font = ImageFont.load_default()
                
                # Get current lyric based on audio analysis
                current_line = get_current_lyric(t, lyrics_with_timing)
                
                if current_line:
                    # Calculate text position
                    try:
                        bbox = draw.textbbox((0, 0), current_line, font=font)
                    except AttributeError:
                        bbox = draw.textsize(current_line, font=font)
                        bbox = (0, 0, bbox[0], bbox[1])
                    
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]
                    x = (width - text_width) // 2
                    y = (height - text_height) // 2
                    
                    # Semi-transparent background for text
                    padding = 10
                    draw.rectangle([
                        x - padding, y - padding,
                        x + text_width + padding, y + text_height + padding
                    ], fill=(0, 0, 0, 180))
                    
                    # Text with shadow for readability
                    shadow_color = (0, 0, 0)
                    text_color = (255, 255, 255)
                    
                    # Shadow
                    draw.text((x+2, y+2), current_line, font=font, fill=shadow_color)
                    # Main text
                    draw.text((x, y), current_line, font=font, fill=text_color)
                
                return np.array(pil_img)
                
            except Exception as e:
                print(f"❌ Frame error at {t:.1f}s: {e}")
                return np.zeros((height, width, 3), dtype=np.uint8)
        
        # Create video
        print("🎬 Creating audio-synced video frames...")
        video = VideoClip(make_synced_frame, duration=duration)
        video = video.set_audio(audio_clip)
        
        # Export
        output_dir = '../data/videos'
        os.makedirs(output_dir, exist_ok=True)
        
        # Include duration in filename if limited
        if max_duration:
            output_file = os.path.join(output_dir, f"{song_data['song_name']}_audio_synced_{max_duration}s.mp4")
        else:
            output_file = os.path.join(output_dir, f"{song_data['song_name']}_audio_synced_full.mp4")
        
        print("📹 Exporting audio-synced video...")
        video.write_videofile(
            output_file, 
            fps=fps, 
            codec='libx264',
            audio_codec='aac',
            verbose=False,
            logger=None
        )
        
        print(f"✅ AUDIO-SYNCED video created: {output_file}")
        print(f"📊 File size: {os.path.getsize(output_file) / (1024*1024):.1f} MB")
        
        # Display timing information
        print("\n📋 Lyrics Timing Summary:")
        print("-" * 50)
        for i, lyric in enumerate(lyrics_with_timing):
            print(f"{i+1:2d}. {lyric['start_time']:5.1f}s - {lyric['end_time']:5.1f}s: {lyric['text'][:40]}{'...' if len(lyric['text']) > 40 else ''}")
        
        # Clean up
        video.close()
        audio_clip.close()
        
        return output_file
        
    except Exception as e:
        print(f"❌ Audio-synced video error: {e}")
        import traceback
        traceback.print_exc()
        return None

# Also fix the tempo formatting error in detect_vocal_segments function:
def detect_vocal_segments(audio_path, plot_analysis=False):
    """Use librosa to detect vocal segments and onsets - FIXED TEMPO FORMATTING"""
    try:
        print("🎤 Analyzing audio for vocal segments...")
        
        # Load audio file
        y, sr = librosa.load(audio_path, sr=None)
        duration = librosa.get_duration(y=y, sr=sr)
        
        print(f"📊 Audio loaded: {duration:.2f}s, SR: {sr}Hz")
        
        # Extract features for vocal detection
        print("📊 Extracting audio features...")
        
        # Harmonic-percussive source separation
        y_harmonic, y_percussive = librosa.effects.hpss(y)
        
        # Detect onsets (places where vocals/instruments start)
        onset_frames = librosa.onset.onset_detect(
            y=y, 
            sr=sr, 
            hop_length=512, 
            backtrack=True,
            delta=0.1
        )
        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
        
        # Detect beats for rhythm analysis - FIX TEMPO FORMATTING
        tempo_beat = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
        tempo = tempo_beat[0]  # Extract tempo value
        beat_frames = tempo_beat[1]  # Extract beat frames
        beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=512)
        
        # Vocal activity detection using spectral features
        stft = librosa.stft(y_harmonic)
        spectral_centroids = librosa.feature.spectral_centroid(y=y_harmonic, sr=sr)
        
        # FIX: Handle the shape of spectral_centroids properly
        print(f"📐 Spectral centroids shape: {spectral_centroids.shape}")
        
        # Flatten and normalize features
        spectral_flat = spectral_centroids.flatten()
        spectral_centroids_normalized = (spectral_flat - np.min(spectral_flat)) / (np.max(spectral_flat) - np.min(spectral_flat))
        
        # Create time array for features - FIXED
        times = librosa.times_like(spectral_centroids, sr=sr, hop_length=512)
        times_flat = times.flatten()
        
        print(f"📐 Times shape: {times.shape}, Normalized centroids shape: {spectral_centroids_normalized.shape}")
        
        # Detect vocal segments based on spectral features - FIXED
        vocal_segments = []
        current_segment = None
        vocal_threshold = 0.4  # Adjusted threshold
        
        # Ensure arrays are the same length
        min_length = min(len(times_flat), len(spectral_centroids_normalized))
        
        for i in range(min_length):
            time = times_flat[i]
            centroid = spectral_centroids_normalized[i]
            
            if centroid > vocal_threshold:
                if current_segment is None:
                    current_segment = {'start': time, 'end': time}
                else:
                    current_segment['end'] = time
            else:
                if current_segment is not None:
                    # Only keep segments longer than 0.5 seconds
                    if current_segment['end'] - current_segment['start'] > 0.5:
                        vocal_segments.append(current_segment)
                    current_segment = None
        
        # Add the last segment if exists
        if current_segment is not None and current_segment['end'] - current_segment['start'] > 0.5:
            vocal_segments.append(current_segment)
        
        # If no vocal segments detected, use a fallback approach
        if not vocal_segments:
            print("⚠️ No vocal segments detected, using onset-based segmentation")
            # Use onsets to create segments
            for i in range(len(onset_times) - 1):
                vocal_segments.append({
                    'start': onset_times[i],
                    'end': onset_times[i + 1]
                })
        
        # Plot analysis if requested
        if plot_analysis:
            plot_audio_analysis(y, sr, times_flat[:min_length], spectral_centroids_normalized[:min_length], onset_times, vocal_segments)
        
        print(f"✅ Detected {len(vocal_segments)} vocal segments")
        print(f"✅ Detected {len(onset_times)} musical onsets")
        print(f"🎵 Estimated tempo: {tempo:.1f} BPM")  # FIXED: Now tempo is a float
        
        return {
            'duration': duration,
            'onset_times': onset_times,
            'beat_times': beat_times,
            'vocal_segments': vocal_segments,
            'tempo': tempo
        }
        
    except Exception as e:
        print(f"❌ Audio analysis error: {e}")
        import traceback
        traceback.print_exc()
        # Return basic analysis with duration only
        return {
            'duration': 185,  # Fallback duration for Imagine
            'onset_times': [],
            'beat_times': [],
            'vocal_segments': [{'start': 5, 'end': 180}],  # Assume most of song has vocals
            'tempo': 120
        }

NameError: name 'songId' is not defined