In [1]:
from moviepy.editor import *
from sqlalchemy import create_engine
from datetime import datetime
import pandas as pd
import os
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt

# Your database connection
engine = create_engine('mysql+pymysql://root@localhost:3306/music_development')
data_path = '../data/'

# Get song data
sql = '''
SELECT * FROM songs
WHERE name LIKE "The Older I Get"
'''
songs = pd.read_sql(sql, engine)
songId = songs['id'].iloc[0]

# Get lyrics
sql = f"""
SELECT id, song_id, content 
FROM lyrics 
WHERE song_id = {songId}
"""
lyrics = pd.read_sql(sql, engine)
# Get the lyrics content
lyrics_content = lyrics.content.iloc[0]

# Print with proper line breaks (replace \r\n with actual newlines)
print("Full lyrics content:")
print("=" * 50)
print(lyrics_content.replace('\r\n', '\n'))

def parse_timestamp_lyrics(lyrics_text):
    """Parse lyrics with timestamp format from YouTube transcripts using colon separator"""
    lines = [line.strip() for line in lyrics_text.split('\n') if line.strip()]
    
    lyrics_with_timing = []
    
    for line in lines:
        # Split by the first colon to separate timestamp from text
        if ':' in line:
            timestamp_str, text = line.split(':', 1)
            try:
                # Convert timestamp to float (seconds)
                timestamp = float(timestamp_str.strip())
                lyrics_with_timing.append({
                    'timestamp': timestamp,
                    'text': text.strip().upper(),  # MODIFIED: Convert to uppercase
                    'start_time': timestamp,
                    'end_time': None  # Will be calculated later
                })
            except ValueError:
                # Skip lines that don't have valid timestamps
                print(f"⚠️ Skipping line with invalid timestamp: {line}")
                continue
        else:
            print(f"⚠️ Skipping line without colon separator: {line}")
    
    # Sort by timestamp
    lyrics_with_timing.sort(key=lambda x: x['timestamp'])
    
    # Calculate end times (next line's start time, or song duration for last line)
    for i in range(len(lyrics_with_timing)):
        if i < len(lyrics_with_timing) - 1:
            lyrics_with_timing[i]['end_time'] = lyrics_with_timing[i + 1]['timestamp']
        else:
            # For the last line, we'll set a default duration
            lyrics_with_timing[i]['end_time'] = lyrics_with_timing[i]['timestamp'] + 5  # 5 seconds default
    
    return lyrics_with_timing

def get_current_lyric(current_time, lyrics_with_timing):
    """Find which lyric should be displayed at current time"""
    for lyric in lyrics_with_timing:
        if lyric['start_time'] <= current_time < lyric['end_time']:
            return lyric['text']
    return None

def create_timestamp_synced_video(song_id=songId, max_duration=None, enable_manual_correction=True):
    """Create video using timestamp-based lyrics from YouTube transcripts"""
    
    try:
        # Get song data
        query = f"""
        SELECT s.name as song_name, s.location as audio_file,
               l.content as lyrics, a.first_name, a.last_name
        FROM songs s 
        JOIN lyrics l ON s.id = l.song_id 
        JOIN artists a ON s.artist_id = a.id 
        WHERE s.id = {song_id}
        """
        
        df = pd.read_sql(query, engine)
        song_data = df.iloc[0]
        
        print(f"🎵 Creating TIMESTAMP-SYNCED video for: {song_data['song_name']}")
        
        # Construct file paths
        audio_dir = os.path.join(r"C:\ruby\music\public\uploads\song\location", str(song_id))
        audio_path = os.path.join(audio_dir, song_data['audio_file'])
        background_image_path = os.path.join(audio_dir, "Folder.jpg")
        
        print(f"🔊 Audio: {os.path.basename(audio_path)}")
        print(f"🖼️ Background: {os.path.basename(background_image_path)}")
        
        if not os.path.exists(audio_path):
            print("❌ Audio file not found")
            return None
        
        # Load audio clip to get duration
        audio_clip = AudioFileClip(audio_path)
        full_duration = audio_clip.duration
        
        # Apply duration limit
        if max_duration:
            duration = min(max_duration, full_duration)
            print(f"⏱️ Using LIMITED duration: {duration:.1f}s (max_duration={max_duration}s)")
        else:
            duration = full_duration
            print(f"⏱️ Using FULL duration: {duration:.1f}s")
        
        # Trim audio if needed
        if max_duration and full_duration > max_duration:
            audio_clip = audio_clip.subclip(0, duration)
        
        # Parse timestamp-based lyrics
        lyrics_with_timing = parse_timestamp_lyrics(song_data['lyrics'])
        
        if not lyrics_with_timing:
            print("❌ Could not parse timestamp lyrics")
            return None
        
        print(f"📝 Parsed {len(lyrics_with_timing)} timestamped lyrics lines")
        
        # MANUAL CORRECTION STEP (optional)
        if enable_manual_correction:
            lyrics_with_timing = manual_lyrics_timing_correction(lyrics_with_timing, duration)
        
        # Filter lyrics to only include those within the limited duration
        if max_duration:
            lyrics_with_timing = [lyric for lyric in lyrics_with_timing if lyric['start_time'] < duration]
            # Adjust the end time of the last lyric to match the limited duration
            if lyrics_with_timing and lyrics_with_timing[-1]['end_time'] > duration:
                lyrics_with_timing[-1]['end_time'] = duration
        
        print(f"📝 Final timing: {len(lyrics_with_timing)} lyrics lines")
        print(f"⏱️ Video duration: {duration:.1f}s ({duration/60:.1f} minutes)")
        
        # Video settings
        fps = 24
        width, height = 640, 480
        
        def make_synced_frame(t):
            try:
                # Load background
                if os.path.exists(background_image_path):
                    bg_image = Image.open(background_image_path)
                    bg_image = bg_image.resize((width, height), Image.Resampling.LANCZOS)
                    frame = np.array(bg_image)
                else:
                    frame = np.full((height, width, 3), [40, 40, 80], dtype=np.uint8)
                
                # Convert to PIL for text drawing
                pil_img = Image.fromarray(frame)
                draw = ImageDraw.Draw(pil_img)
                
                # Load font
                try:
                    font = ImageFont.truetype("arial.ttf", 32)
                except:
                    try:
                        font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 32)
                    except:
                        font = ImageFont.load_default()
                
                # Get current lyric based on timestamp timing
                current_line = get_current_lyric(t, lyrics_with_timing)
                
                if current_line:
                    # Calculate text position
                    try:
                        bbox = draw.textbbox((0, 0), current_line, font=font)
                    except AttributeError:
                        bbox = draw.textsize(current_line, font=font)
                        bbox = (0, 0, bbox[0], bbox[1])
                    
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]
                    x = (width - text_width) // 2
                    y = (height - text_height) // 2
                    
                    # Semi-transparent background for text
                    padding = 10
                    draw.rectangle([
                        x - padding, y - padding,
                        x + text_width + padding, y + text_height + padding
                    ], fill=(0, 0, 0, 180))
                    
                    # Text with shadow for readability
                    shadow_color = (0, 0, 0)
                    text_color = (255, 255, 255)
                    
                    # Shadow
                    draw.text((x+2, y+2), current_line, font=font, fill=shadow_color)
                    # Main text
                    draw.text((x, y), current_line, font=font, fill=text_color)
                
                return np.array(pil_img)
                
            except Exception as e:
                print(f"❌ Frame error at {t:.1f}s: {e}")
                return np.zeros((height, width, 3), dtype=np.uint8)
        
        # Create video
        print("🎬 Creating timestamp-synced video frames...")
        video = VideoClip(make_synced_frame, duration=duration)
        video = video.set_audio(audio_clip)
        
        # Export
        output_dir = '../data/videos'
        os.makedirs(output_dir, exist_ok=True)
        
        # Include correction in filename
        correction_suffix = "_corrected" if enable_manual_correction else ""
        duration_suffix = f"_{max_duration}s" if max_duration else "_full"
        
        output_file = os.path.join(output_dir, f"{song_data['song_name']}_timestamp_synced{correction_suffix}{duration_suffix}.mp4")
        
        print("📹 Exporting video...")
        video.write_videofile(
            output_file, 
            fps=fps, 
            codec='libx264',
            audio_codec='aac',
            verbose=False,
            logger=None
        )
        
        print(f"✅ TIMESTAMP-SYNCED VIDEO CREATED: {output_file}")
        print(f"📊 File size: {os.path.getsize(output_file) / (1024*1024):.1f} MB")
        
        # Display final timing
        print("\n📋 FINAL Lyrics Timing:")
        print("-" * 50)
        for i, lyric in enumerate(lyrics_with_timing):
            print(f"{i+1:2d}. {lyric['start_time']:5.1f}s - {lyric['end_time']:5.1f}s: {lyric['text'][:40]}{'...' if len(lyric['text']) > 40 else ''}")
        
        # Clean up
        video.close()
        audio_clip.close()
        
        return output_file
        
    except Exception as e:
        print(f"❌ Video creation error: {e}")
        import traceback
        traceback.print_exc()
        return None

def manual_lyrics_timing_correction(lyrics_with_timing, song_duration):
    """Allow user to manually correct lyrics timing with START TIME input"""
    
    print("\n" + "="*60)
    print("🎵 MANUAL LYRICS TIMING CORRECTION (START TIMES)")
    print("="*60)
    print("Current timestamp timing:")
    print("-" * 50)
    
    # Display current timing
    for i, lyric in enumerate(lyrics_with_timing):
        print(f"{i+1:2d}. {lyric['start_time']:5.1f}s - {lyric['end_time']:5.1f}s: {lyric['text'][:40]}{'...' if len(lyric['text']) > 40 else ''}")
    
    print(f"\nSong duration: {song_duration:.1f}s")
    print("\nWould you like to manually adjust the START TIMES? (y/n): ")
    
    try:
        response = input().strip().lower()
        if response not in ['y', 'yes']:
            print("Using timestamp timing...")
            return lyrics_with_timing
    except:
        print("Using timestamp timing...")
        return lyrics_with_timing
    
    print("\n📝 MANUAL TIMING INSTRUCTIONS:")
    print("Enter timing in format: 'line_number,start_time'")
    print("Example: '1,5.5' means line 1 starts at 5.5 seconds")
    print("Example: '2,12.8' means line 2 starts at 12.8 seconds")
    print("Enter 'done' when finished, or 'skip' to use timestamp timing")
    print("-" * 50)
    
    corrected_timing = lyrics_with_timing.copy()
    user_start_times = {}
    
    while True:
        try:
            user_input = input("\nEnter timing (line_number,start_time) or 'done': ").strip()
            
            if user_input.lower() in ['done', '']:
                break
            elif user_input.lower() == 'skip':
                print("Using timestamp timing...")
                return lyrics_with_timing
            
            # Parse input
            parts = user_input.split(',')
            if len(parts) != 2:
                print("❌ Invalid format. Use: 'line_number,start_time'")
                continue
            
            line_num = int(parts[0].strip())
            start_time = float(parts[1].strip())
            
            # Validate line number
            if line_num < 1 or line_num > len(corrected_timing):
                print(f"❌ Line number must be between 1 and {len(corrected_timing)}")
                continue
            
            # Validate start time
            if start_time < 0 or start_time > song_duration:
                print(f"❌ Start time must be between 0 and {song_duration:.1f}")
                continue
            
            # Store user input
            user_start_times[line_num] = start_time
            line_index = line_num - 1
            
            print(f"✅ Set line {line_num} to start at {start_time:.1f}s: {corrected_timing[line_index]['text'][:30]}...")
            
            # Show current progress
            print("\nCurrent manual timing:")
            for i, lyric in enumerate(corrected_timing):
                marker = " ←" if (i+1) in user_start_times else ""
                user_start = user_start_times.get(i+1, lyric['start_time'])
                print(f"{i+1:2d}. {user_start:5.1f}s : {lyric['text'][:30]}...{marker}")
                
        except ValueError as e:
            print(f"❌ Invalid input: {e}")
        except Exception as e:
            print(f"❌ Error: {e}")
    
    # Apply all user corrections at once
    if user_start_times:
        corrected_timing = apply_start_time_corrections(corrected_timing, user_start_times, song_duration)
        print("\n✅ Manual timing correction completed!")
    else:
        print("\n⏩ No corrections made, using timestamp timing...")
    
    return corrected_timing

def apply_start_time_corrections(lyrics_with_timing, user_start_times, song_duration):
    """Apply user-provided start times and calculate end times automatically"""
    
    corrected = []
    
    for i, lyric in enumerate(lyrics_with_timing):
        line_num = i + 1
        
        # Use user-provided start time if available, otherwise use timestamp
        if line_num in user_start_times:
            start_time = user_start_times[line_num]
        else:
            start_time = lyric['start_time']
        
        # Calculate end time:
        # - If next line has user-defined start time, end there
        # - Otherwise, use next timestamp or estimate
        if i < len(lyrics_with_timing) - 1:
            next_line_num = i + 2
            if next_line_num in user_start_times:
                end_time = user_start_times[next_line_num]
            else:
                # Use the original timestamp for next line
                end_time = lyrics_with_timing[i + 1]['start_time']
        else:
            # Last line - end at song duration
            end_time = song_duration
        
        corrected.append({
            'text': lyric['text'],
            'start_time': start_time,
            'end_time': end_time
        })
    
    return corrected

# Run the timestamp-synced version
if __name__ == "__main__":
    print("=" * 70)
    print("🎬 CREATING TIMESTAMP-SYNCED VIDEO")
    print("=" * 70)
    
    result = create_timestamp_synced_video(song_id=songId, max_duration=None, enable_manual_correction=False)
    
    if result:
        print(f"\n🎉 SUCCESS! Timestamp-synced video created: {result}")
        print("\n✨ Features:")
        print("   ✅ YouTube transcript timestamp parsing")
        print("   ✅ Manual timing correction available")
        print("   ✅ No complex audio analysis needed")
        print("   ✅ Perfect synchronization with original timestamps")
    else:
        print("\n❌ Timestamp-synced video creation failed")

Full lyrics content:
18:the older i get
22:the more i think
26:you only get a minute
29:better live while you're in it
30:cause it's gone in a blink
34:and the older i get
38:the truer it is
42:it's the people you love
46:not the money and stuff
47:that makes you rich
51:and if they found a fountain of youth
54:i wouldn't drink a drop
58:and that's the truth
60:funny how it feels i'm just getting to
63:my best years yet
68:the older i get
72:fewer friends i have
76:but you don't need a lot and the ones
79:that you've got
80:have always got your back
84:and the older i get
89:the better i am
93:and knowing when to give
96:and when to just not give a damn
101:and if they found a fountain of youth
105:i wouldn't drink a drop
109:and that's the truth
110:funny how it feels i'm just getting to
114:my best years yet
118:the older i get
152:and i don't mind all the lies
156:from all the times i've
158:laughed and cried
162:souvenirs and little signs
165:of the life i've lived
170:the older i 