In [1]:
from moviepy.editor import *
from sqlalchemy import create_engine
from datetime import datetime
import pandas as pd
import os
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt

# Your database connection
engine = create_engine('mysql+pymysql://root@localhost:3306/music_development')
data_path = '../data/'

# Get song data
sql = '''
SELECT S.id, rank, name, artist_id, A.first_name, A.last_name , youtube_code 
FROM songs S
JOIN artists A
ON S.artist_id = A.id
WHERE name LIKE "Child (PH)"
'''
songs = pd.read_sql(sql, engine)
songs

Unnamed: 0,id,rank,name,artist_id,first_name,last_name,youtube_code
0,620,1128,Child (PH),336,Freddie,Aguilar,o-n6ok1uLjQ


In [2]:
songId = songs['id'].iloc[0]
songId

620

In [3]:
sql = f'''
SELECT S.id, rank, name, artist_id, A.first_name, A.last_name , youtube_code 
FROM songs S
JOIN artists A
ON S.artist_id = A.id
WHERE S.id = {songId}
'''
song = pd.read_sql(sql, engine)
song

Unnamed: 0,id,rank,name,artist_id,first_name,last_name,youtube_code
0,620,1128,Child (PH),336,Freddie,Aguilar,o-n6ok1uLjQ


In [4]:
# Get artist name as a string (not pandas Series)
artist_name = f"{song['first_name'].iloc[0]} {song['last_name'].iloc[0]}"
print(f"üé§ Artist: {artist_name}")

üé§ Artist: Freddie Aguilar


In [5]:
# Get lyrics
sql = f"""
SELECT id, song_id, content 
FROM lyrics 
WHERE song_id = {songId}
"""
lyrics = pd.read_sql(sql, engine)
lyrics

Unnamed: 0,id,song_id,content
0,546,620,1|[Music]\r\n29|When you were born into this w...


In [6]:
# Get the lyrics content
lyrics_content = lyrics.content.iloc[0]

# Print with proper line breaks (replace \r\n with actual newlines)
print("Full lyrics content:")
print("-" * 50)
print(lyrics_content.replace('\r\n', '\n'))
print("=" * 50)

Full lyrics content:
--------------------------------------------------
1|[Music]
29|When you were born into this world
33|Your mom and dad saw a dream fulfilled
36|Dream come true
38|The answer to their prayers

44|You were to them a special child
48|Gave them joy every time you smiled
51|Each time you cried
53|They're at your side to care
58|
58|Child, you don't know, you'll never know
61|How far they'd go, to give you all
65|Their love can give

71|To see you through and God it's true
74|They'd die for you, if they must
78|To see you live

84|How many seasons came and went
88|So many years have now been spent
91|For time ran fast
93|And now at last you're strong

101|Now what has gotten over you
104|You seem to hate your parents too
107|Speak out your mind
109|Why do you find them wrong

114|Child, you don't know, you'll never know
117|How far they'd go, to give you all
121|Their love can give

126|To see you through and God it's true
129|They die for you, if they must 
133|To see y

In [7]:
# begin of modified function
def parse_timestamp_lyrics(lyrics_text):
    """Parse lyrics with timestamp format from YouTube transcripts using pipe separator"""
    lines = [line.strip() for line in lyrics_text.split('\n') if line.strip()]
    
    lyrics_with_timing = []
    
    for line in lines:
        # Split by the first pipe to separate timestamp from text
        if '|' in line:
            timestamp_str, text = line.split('|', 1)
            try:
                # Convert timestamp to float (seconds)
                timestamp = float(timestamp_str.strip())
                lyrics_with_timing.append({
                    'timestamp': timestamp,
                    'text': text.strip(),  # MODIFIED: Convert to uppercase
                    'start_time': timestamp,
                    'end_time': None,  # Will be calculated later
                    'duration_seconds': None,  # NEW: Will be calculated later
                    'word_count': None,  # NEW: Will be calculated
                    'spaces_count': None,  # NEW: Will be calculated
                    'char_count': None  # NEW: Will be calculated
                })
            except ValueError:
                # Skip lines that don't have valid timestamps
                print(f"‚ö†Ô∏è Skipping line with invalid timestamp: {line}")
                continue
        else:
            print(f"‚ö†Ô∏è Skipping line without Pipe separator: {line}")
    
    # Sort by timestamp
    lyrics_with_timing.sort(key=lambda x: x['timestamp'])
    
    # Calculate end times and additional metrics
    for i in range(len(lyrics_with_timing)):
        if i < len(lyrics_with_timing) - 1:
            lyrics_with_timing[i]['end_time'] = lyrics_with_timing[i + 1]['timestamp']
        else:
            # For the last line, we'll set a default duration
            lyrics_with_timing[i]['end_time'] = lyrics_with_timing[i]['timestamp'] + 5  # 5 seconds default
        
        # NEW: Calculate duration in seconds
        lyrics_with_timing[i]['duration_seconds'] = lyrics_with_timing[i]['end_time'] - lyrics_with_timing[i]['start_time']
        
        # NEW: Calculate word count (split by spaces and filter out empty strings)
        words = [word for word in lyrics_with_timing[i]['text'].split() if word]
        lyrics_with_timing[i]['word_count'] = len(words)
        
        # NEW: Calculate spaces between words (word_count - 1, minimum 0)
        lyrics_with_timing[i]['spaces_count'] = max(0, lyrics_with_timing[i]['word_count'] - 1)
        
        # NEW: Calculate total characters including spaces
        lyrics_with_timing[i]['char_count'] = len(lyrics_with_timing[i]['text'])
    
    return lyrics_with_timing
# end of modified function
    
def get_current_lyric(current_time, lyrics_with_timing):
    """Find which lyric should be displayed at current time"""
    for lyric in lyrics_with_timing:
        if lyric['start_time'] <= current_time < lyric['end_time']:
            return lyric['text']
    return None

def create_timestamp_synced_video(song_id=songId, max_duration=None):
    """Create video using timestamp-based lyrics from YouTube transcripts"""
    
    try:
        # Get song data
        query = f"""
        SELECT s.name as song_name, s.location as audio_file,
               l.content as lyrics, a.first_name, a.last_name
        FROM songs s 
        JOIN lyrics l ON s.id = l.song_id 
        JOIN artists a ON s.artist_id = a.id 
        WHERE s.id = {song_id}
        """
        
        df = pd.read_sql(query, engine)
        song_data = df.iloc[0]
        
        print(f"üéµ Creating TIMESTAMP-SYNCED video for: {song_data['song_name']}")
        
        # Construct file paths
        audio_dir = os.path.join(r"C:\ruby\music\public\uploads\song\location", str(song_id))
        audio_path = os.path.join(audio_dir, song_data['audio_file'])
        background_image_path = os.path.join(audio_dir, "Folder.jpg")
        
        print(f"üîä Audio: {os.path.basename(audio_path)}")
        print(f"üñºÔ∏è Background: {os.path.basename(background_image_path)}")
        
        if not os.path.exists(audio_path):
            print("‚ùå Audio file not found")
            return None
        
        # Load audio clip to get duration
        audio_clip = AudioFileClip(audio_path)
        full_duration = audio_clip.duration
        
        # Apply duration limit
        if max_duration:
            duration = min(max_duration, full_duration)
            print(f"‚è±Ô∏è Using LIMITED duration: {duration:.1f}s (max_duration={max_duration}s)")
        else:
            duration = full_duration
            print(f"‚è±Ô∏è Using FULL duration: {duration:.1f}s")
        
        # Trim audio if needed
        if max_duration and full_duration > max_duration:
            audio_clip = audio_clip.subclip(0, duration)
        
        # Parse timestamp-based lyrics
        lyrics_with_timing = parse_timestamp_lyrics(song_data['lyrics'])
        
        if not lyrics_with_timing:
            print("‚ùå Could not parse timestamp lyrics")
            return None
        
        print(f"üìù Parsed {len(lyrics_with_timing)} timestamped lyrics lines")
        
        # Filter lyrics to only include those within the limited duration
        if max_duration:
            lyrics_with_timing = [lyric for lyric in lyrics_with_timing if lyric['start_time'] < duration]
            # Adjust the end time of the last lyric to match the limited duration
            if lyrics_with_timing and lyrics_with_timing[-1]['end_time'] > duration:
                lyrics_with_timing[-1]['end_time'] = duration
        
        print(f"üìù Final timing: {len(lyrics_with_timing)} lyrics lines")
        print(f"‚è±Ô∏è Video duration: {duration:.1f}s ({duration/60:.1f} minutes)")


        # Display parsed timing with new columns
        print("\nüìã Parsed Lyrics Timing:")
        print("-" * 80)
        for i, lyric in enumerate(lyrics_with_timing):
            display_text = lyric['text'][:40] + ('...' if len(lyric['text']) > 40 else '') #no. of chars per line
    
            print(f"{i+1:2d}. {lyric['start_time']:5.1f}s - {lyric['end_time']:5.1f}s: "
                  f"{display_text:<43} | "  # <-- This ensures fixed width of 43 characters
                  f"{lyric['duration_seconds']:4.1f}s | "
                  f"{lyric['word_count']:2d} | "
                  f"{lyric['spaces_count']:2d} | "
                  f"{lyric['char_count']:3d}")
        # end of Display parsed timing with new columns

        # Video settings
        fps = 24
        width, height = 640, 480
        
        def make_synced_frame(t):
            try:
                # Load background
                if os.path.exists(background_image_path):
                    bg_image = Image.open(background_image_path)
                    bg_image = bg_image.resize((width, height), Image.Resampling.LANCZOS)
                    frame = np.array(bg_image)
                else:
                    frame = np.full((height, width, 3), [40, 40, 80], dtype=np.uint8)
                
                # Convert to PIL for text drawing
                pil_img = Image.fromarray(frame)
                draw = ImageDraw.Draw(pil_img)
                
                # Load font
                try:
                    font = ImageFont.truetype("arial.ttf", 32)
                except:
                    try:
                        font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 32)
                    except:
                        font = ImageFont.load_default()
                
                # Get current lyric based on timestamp timing
                current_line = get_current_lyric(t, lyrics_with_timing)
                
                if current_line:
                    # Calculate text position
                    try:
                        bbox = draw.textbbox((0, 0), current_line, font=font)
                    except AttributeError:
                        bbox = draw.textsize(current_line, font=font)
                        bbox = (0, 0, bbox[0], bbox[1])
                    
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]
                    x = (width - text_width) // 2
                    # y = (height - text_height) // 2
                    y = height - 50  # Position near bottom - the lesser the lower
                    
                    text_color = (255, 255, 255) # White text (txtclr)
                    # text_color = (0, 0, 0)         # Black text
                    # Shadow
                    # draw.text((x+2, y+2), current_line, font=font, fill=shadow_color)
                    # Main text
                    # draw.text((x, y), current_line, font=font, fill=text_color)
                    # Main text only (no shadow, no background rectangle)
                    draw.text((x, y), current_line, font=font, fill=text_color)
                return np.array(pil_img)
                
            except Exception as e:
                print(f"‚ùå Frame error at {t:.1f}s: {e}")
                return np.zeros((height, width, 3), dtype=np.uint8)
        
        # Create video
        print("üé¨ Creating timestamp-synced video frames...")
        video = VideoClip(make_synced_frame, duration=duration)
        video = video.set_audio(audio_clip)
        
        # Export
        output_dir = '../data/videos'
        os.makedirs(output_dir, exist_ok=True)
        
        # Include duration in filename
        duration_suffix = f"_{max_duration}s" if max_duration else "_full"
        
        # Use the artist name from song_data to ensure it's a string
        clean_artist_name = f"{song_data['first_name']} {song_data['last_name']}"
        output_file = os.path.join(output_dir, f"{song_data['song_name']}_{clean_artist_name}.mp4")
        
        print("üìπ Exporting video...")
        video.write_videofile(
            output_file, 
            fps=fps, 
            codec='libx264',
            audio_codec='aac',
            verbose=False,
            logger=None
        )
        
        print(f"‚úÖ TIMESTAMP-SYNCED VIDEO CREATED: {output_file}")
        print(f"üìä File size: {os.path.getsize(output_file) / (1024*1024):.1f} MB")
        
        # Clean up
        video.close()
        audio_clip.close()
        
        return output_file
        
    except Exception as e:
        print(f"‚ùå Video creation error: {e}")
        import traceback
        traceback.print_exc()
        return None

# Run the timestamp-synced version
if __name__ == "__main__":
    print("=" * 70)
    print("üé¨ CREATING TIMESTAMP-SYNCED VIDEO")
    print("=" * 70)
    
    result = create_timestamp_synced_video(song_id=songId, max_duration=None)
    # print(output_file)
    if result:
        # print(f"\nüéâ SUCCESS! video created: {output_file}")       
        print(f"\nüéâ SUCCESS! Timestamp-synced video created: {result}")
        print("\n‚ú® Features:")
        print("   ‚úÖ YouTube transcript timestamp parsing")
        print("   ‚úÖ Pipe separator for timestamps")
        print("   ‚úÖ No complex audio analysis needed")
        print("   ‚úÖ Perfect synchronization with original timestamps")
        
    else:
        print("\n‚ùå Timestamp-synced video creation failed")

üé¨ CREATING TIMESTAMP-SYNCED VIDEO
üéµ Creating TIMESTAMP-SYNCED video for: Child (PH)
üîä Audio: Freddie_Aguilar_-_Child__PH_.mp3
üñºÔ∏è Background: Folder.jpg
‚è±Ô∏è Using FULL duration: 234.2s
üìù Parsed 50 timestamped lyrics lines
üìù Final timing: 50 lyrics lines
‚è±Ô∏è Video duration: 234.2s (3.9 minutes)

üìã Parsed Lyrics Timing:
--------------------------------------------------------------------------------
 1.   1.0s -  29.0s: [Music]                                     | 28.0s |  1 |  0 |   7
 2.  29.0s -  33.0s: When you were born into this world          |  4.0s |  7 |  6 |  34
 3.  33.0s -  36.0s: Your mom and dad saw a dream fulfilled      |  3.0s |  8 |  7 |  38
 4.  36.0s -  38.0s: Dream come true                             |  2.0s |  3 |  2 |  15
 5.  38.0s -  44.0s: The answer to their prayers                 |  6.0s |  5 |  4 |  27
 6.  44.0s -  48.0s: You were to them a special child            |  4.0s |  7 |  6 |  32
 7.  48.0s -  51.0s: Gave them joy ev