In [1]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [3]:
import os
import json
import shutil
import magic
import re
import sys
from opencc import OpenCC
from openai import OpenAI
from pydantic import BaseModel
from typing import List, Dict
from natsort import natsorted

# Add Pydantic model for JSON schema
class RenameSchema(BaseModel):
    old_to_new: Dict[str, str]


class AudioFileValidator:
    """Strict audio file validation excluding non-audio files"""
    AUDIO_EXTENSIONS = {
        '.dsd', '.dff', '.dsf', '.wav', '.aiff', '.aif',
        '.flac', '.alac', '.dts', '.thd', '.mlp', '.mqa', 
        '.tak', '.ape', '.mp3', '.aac', '.m4a', '.ogg', '.wma'
    }
    
    def is_audio_file(self, filename: str) -> bool:
        ext = os.path.splitext(filename)[1].lower()
        return ext in self.AUDIO_EXTENSIONS

class FileRenameMap(BaseModel):
    old_to_new: Dict[str, str]  # Direct filename mapping

def clean_filename(name: str) -> str:
    """Basic sanitization without changing order"""
    return re.sub(r'[<>:"/\\|?*]', '', name)

def process_album(album_dir: str):
    client = OpenAI(
        api_key=os.getenv("PERPLEXITY_API_KEY"),
        base_url="https://api.perplexity.ai"
    )
    
    try:
        # Get original file list in directory order
        files = natsorted([
            f for f in os.listdir(album_dir) 
            if AudioFileValidator().is_audio_file(f)
        ])
        
        # Get direct filename mapping from LLM
        response = client.chat.completions.create(
            model="sonar-pro",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a music file alignment specialist. Follow this precise workflow:\n\n"
                    
                    "**Step 1: Filename Analysis**\n"
                    "- For each filename, extract ANY existing track number using pattern matching\n"
                    "- Note: Track numbers may be prefixed (01 Song), suffixed (Song 01), or missing\n\n"
                    
                    "**Step 2: Official Tracklist Verification**\n"
                    "1. Search MusicBrainz/Discogs for OFFICIAL tracklist using album name\n"
                    "2. Cross-reference with streaming platforms (Spotify/Apple Music)\n"
                    "3. Confirm language: Traditional Chinese for Chinese content, English otherwise\n\n"
                    
                    "**Step 3: Track Alignment & Renaming**\n"
                    "- For each file, determine the official track name and number by searching reputable sources such as MusicBrainz, Discogs, YesAsia, or official artist/label websites. Use Google if needed. Search for the album’s official tracklist.\n"
                    "- STRICT RULE: The renamed file must correspond to the actual song content, not just the filename. Do not guess based on filename alone-verify using track duration, lyrics, or other metadata if possible.\n"
                    "- If the official track number/order is found, use a 2-digit zero-padded number (e.g., 01, 02) as a prefix in the new filename, matching the album’s official order.\n"
                    "- If NO official track number/order can be found for a song, do NOT include any number in the new filename-just use the clean official track title.\n"
                    "- Always preserve the original file extension.\n"
                    "\n"
                    "**Examples:**\n"
                    "1. Official tracklist: 01 Happy Birthday, 02 Merry Christmas, 03 Little Tea Pot\n"
                    "   Input: ['Merry Christmas.flac', 'Happy Birthday.flac', '03 Little Tea Pot.flac']\n"
                    "   Output: {\n"
                    "     'Merry Christmas.flac': '02 Merry Christmas.flac',\n"
                    "     'Happy Birthday.flac': '01 Happy Birthday.flac',\n"
                    "     '03 Little Tea Pot.flac': '03 Little Tea Pot.flac'\n"
                    "   }\n"
                    "\n"
                    "2. Official tracklist: 青春修練手冊, 魔法城堡, 寵愛 (no numbers published)\n"
                    "   Input: ['Track 1.mp3', 'Magic Castle.flac', '宠爱.ape']\n"
                    "   Output: {\n"
                    "     'Track 1.mp3': '青春修練手冊.mp3',\n"
                    "     'Magic Castle.flac': '魔法城堡.flac',\n"
                    "     '宠爱.ape': '寵愛.ape'\n"
                    "   }\n"
                    "\n"
                    "3. If you cannot verify the official order for a song, do NOT invent a number-just use the official title.\n"
                    "\n"
                    "THIS IS CRITICAL: Only assign numbers if you can confirm the official track order from at least one reputable source. Otherwise, use the clean title only."

                    "**Step 4: Validation Checks**\n"
                    "- Ensure ALL original files are mapped 1:1\n"
                    "- Verify extensions remain identical (.dsf → .dsf)\n"
                    "- Reject duplicate track numbers\n\n"
                    
                    "**Output Format Requirements**\n"
                    "{\n"
                    "  \"old_to_new\": {\n"
                    "    \"[原始文件名1]\": \"01 正式曲名1.扩展名\",\n" 
                    "    \"[原始文件名2]\": \"02 正式曲名2.扩展名\"\n"
                    "  }\n"
                    "}\n"
                    "- Track numbers: 2-digit zero-padded\n"
                    "- Special cases:\n"
                    "  - Live: Append \"(Live)\"\n"
                    "  - Remix: Append \"(Remix)\"\n"
                    "  - Instrumental: Use \"Instrumental\" artist"
                )
            },
            {
                "role": "user",
                "content": (
                    "Clean and align these files while PRESERVING THEIR INPUT ORDER:\n"
                    f"{json.dumps(files)}\n\n"
                    "Critical Reminders:\n"
                    "1. NEVER reorder files - use input order as track sequence\n"
                    "2. Prioritize official names from verified sources\n"
                    "3. Strict Traditional Chinese for Chinese-language content\n"
                    "4. Return COMPLETE JSON - no truncation"
                )
            }
        ],
            temperature=0.0,
            response_format={
                "type": "json_schema",
                "json_schema": {
                    "schema": RenameSchema.model_json_schema()  # Proper schema reference
                }
            }
        )
        
        rename_map = FileRenameMap.model_validate_json(
            response.choices[0].message.content
        )
        
        # Create new directory under 'new_albums' at the same level as album_dir
        parent_dir = os.path.dirname(album_dir)
        new_albums_dir = os.path.join(parent_dir, "new_albums")
        os.makedirs(new_albums_dir, exist_ok=True)
        new_dir = os.path.join(new_albums_dir, os.path.basename(album_dir))
        os.makedirs(new_dir, exist_ok=True)
        
        # Copy files with new names in original order
        for old_name in files:
            if old_name not in rename_map.old_to_new:
                continue

            new_name = clean_filename(rename_map.old_to_new[old_name])
            shutil.copy2(
            os.path.join(album_dir, old_name),
            os.path.join(new_dir, new_name)
            )
            print(f"Copied: {old_name} → {new_name}")

        # Convert to Traditional Chinese if needed
        convert_traditional_chinese(new_dir)
        
    except Exception as e:
        print(f"Error processing {album_dir}: {str(e)}")

def convert_traditional_chinese(path: str):
    """Optional Chinese conversion"""
    cc = OpenCC('s2t')
    for filename in os.listdir(path):
        base, ext = os.path.splitext(filename)
        new_base = cc.convert(base)
        if new_base != base:
            os.rename(
                os.path.join(path, filename),
                os.path.join(path, f"{clean_filename(new_base)}{ext}")
            )

def main():
    music_root = os.path.abspath("F://Albums")
    if not os.path.exists(music_root):
        print(f"Path not found: {music_root}")
        return
        
    for album in os.listdir(music_root):
        album_path = os.path.join(music_root, album)
        if os.path.isdir(album_path):
            print(f"\nProcessing: {album}")
            process_album(album_path)

if __name__ == "__main__":
    main()



Processing: [Bruno Mars] 24K Magic
Copied: _Bruno Mars-24K Magic.dsf → 01 24K Magic.dsf
Copied: _Bruno Mars-Calling All My Lovelies.dsf → 02 Calling All My Lovelies.dsf
Copied: _Bruno Mars-Chunky.dsf → 03 Chunky.dsf
Copied: _Bruno Mars-Finesse.dsf → 04 Finesse.dsf
Copied: _Bruno Mars-Perm.dsf → 05 Perm.dsf
Copied: _Bruno Mars-Straight Up & Down.dsf → 06 Straight Up & Down.dsf
Copied: _Bruno Mars-That's What I Like.dsf → 07 That's What I Like.dsf
Copied: _Bruno Mars-Too Good To Say Goodbye.dsf → 08 Too Good To Say Goodbye.dsf
Copied: _Bruno Mars-Versace On The Floor.dsf → 09 Versace On The Floor.dsf

Processing: [Taylor Swift] Midnights
Copied: 01 - Taylor Swift - Lavender Haze.flac → 01 Lavender Haze.flac
Copied: 02 - Taylor Swift - Maroon.flac → 02 Maroon.flac
Copied: 03 - Taylor Swift - Anti-Hero.flac → 03 Anti-Hero.flac
Copied: 04 - Taylor Swift - Snow On The Beach.flac → 04 Snow On The Beach.flac
Copied: 05 - Taylor Swift - You're On Your Own, Kid.flac → 05 You're On Your Own, Kid