# Pop2Piano - Colab Workflow (GitHub Edition)

This notebook runs Pop2Piano using the code from your GitHub repository.

## Key Steps:
1. **Install Dependencies:** With fixes for environment issues.
2. **Mock Essentia:** Trick to make transformers work.
3. **Clone Repository:** Fetch your fixed code from GitHub.
4. **Download Data:** Run the download script.
5. **Inference:** Generate Piano Covers.

In [None]:
# @title 1. Mount Google Drive üíæ
from google.colab import drive
import os

drive.mount('/content/drive')

# Create a persistent working directory in your Google Drive
GDRIVE_WORKING_DIR = "/content/drive/MyDrive/Pop2Piano_Workspace"
if not os.path.exists(GDRIVE_WORKING_DIR):
    os.makedirs(GDRIVE_WORKING_DIR)

# Change to the persistent directory
%cd {GDRIVE_WORKING_DIR}

print(f"Switched to persistent directory: {os.getcwd()}")


In [None]:
# @title 1. Install Dependencies üì¶
import os
import sys
import subprocess

print("Installing dependencies... ‚è≥")

# Install Python packages
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "git+https://github.com/huggingface/transformers.git"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "librosa", "pretty_midi", "midi2audio", "scipy", "yt-dlp", "imageio-ffmpeg", "omegaconf", "joblib", "tqdm"])

# Install System packages
os.system("sudo apt-get update -q")
os.system("sudo apt-get install -y -q fluidsynth fluid-soundfont-gm ffmpeg")

print("Done! ‚úÖ")

In [None]:
# @title 2. Apply Magic Fixes (Essentia Mocking) ü™Ñ
import sys
from unittest.mock import MagicMock
from importlib.machinery import ModuleSpec

# Fix for missing essentia
mock_essentia = MagicMock()
# Important fix for Python 3.12+ (spec must be set)
mock_essentia.__spec__ = ModuleSpec(name='essentia', loader=None)

sys.modules["essentia"] = mock_essentia
sys.modules["essentia.standard"] = MagicMock()

print("Essentia mocked successfully! üßô‚Äç‚ôÇÔ∏è")

In [None]:
# @title 3. Clone Repository üêô
import os

repo_url = "https://github.com/kareemkamal10/pop2piano.git"
repo_name = "pop2piano"

if os.path.exists(repo_name):
    print("Repository already cloned. Pulling latest changes...")
    %cd {repo_name}
    !git pull
else:
    print(f"Cloning {repo_url}...")
    !git clone {repo_url}
    %cd {repo_name}

print(f"Current working directory: {os.getcwd()}")

In [None]:
# @title 4. Run Download Script (CLI / Background Mode) üì•
# This behaves like a GitHub Action: Runs in background, logs to file.
# Prevents browser crash due to excessive output.

print("üöÄ Starting Background Download Task...")
print("üìÑ Logs are being written to: download_log.txt")
print("‚ö†Ô∏è This cell will appear to 'finish' or hang silently - THAT IS GOOD.")
print("üëÄ Run the NEXT cell to check progress.")

# Redirect stdout and stderr to a file (> download_log.txt 2>&1)
!python download/download.py train_dataset.csv output_dir/ --max_size_gb 15.0 > download_log.txt 2>&1

print("\n‚úÖ Task Completed!")

In [None]:
# @title 4.1 Monitor Progress (Live Log View) üì∫
# Run this cell anytime to see the last 20 lines of the download process.
# Similar to tailing a log in Linux.

!tail -n 20 download_log.txt

# Or to follow it live for a few seconds (uncomment below):
# !timeout 10 tail -f download_log.txt

In [None]:
# @title 5. Run Inference üéπ (Fixed & Robust)
import torch
import librosa
import numpy as np
from transformers import Pop2PianoForConditionalGeneration, AutoFeatureExtractor, AutoTokenizer, Pop2PianoProcessor
from google.colab import files
from midi2audio import FluidSynth
from IPython.display import Audio, display
import traceback

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Working on: {device} üöÄ")

model_id = "sweetcocoa/pop2piano"
try:
    model = Pop2PianoForConditionalGeneration.from_pretrained(model_id).to(device)
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    processor = Pop2PianoProcessor.from_pretrained(model_id)
    print("Model loaded! ‚úÖ")
except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    print("Try restarting the runtime if this persists.")

def process_audio(audio_path):
    print(f"Processing: {audio_path}...")
    # Load audio
    audio, sr = librosa.load(audio_path, sr=44100)
    
    # Robust beat tracking
    try:
        # Force scalar tempo if librosa returns an array (common issue in new versions)
        tempo, beat_frames = librosa.beat.beat_track(y=audio, sr=sr)
        if isinstance(tempo, np.ndarray):
            tempo = tempo.item()
        print(f"Detected Tempo: {tempo:.2f} BPM")
    except Exception as e:
        print(f"‚ö†Ô∏è Beat tracking failed ({e}), using default tempo 120.")
        tempo = 120.0

    # Create input features
    inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt").to(device)
    
    # Different composer styles available in the model
    target_composer = "pop2piano_kpop_v1" 
    
    with torch.no_grad():
        model_output = model.generate(
            input_features=inputs["input_features"], 
            composer=target_composer
        )
    return model_output, inputs

print("\n>>> Upload your Song (MP3/WAV):")
uploaded = files.upload()

if uploaded:
    for audio_path in uploaded.keys():
        try:
            model_output, inputs = process_audio(audio_path)
            
            # Decode to MIDI
            # Note: The model output is a sequence of tokens, we decode it to MIDI
            midi = processor.batch_decode(model_output, feature_extractor_output=inputs)
            
            midi_filename = f"{audio_path}_piano.mid"
            wav_filename = f"{audio_path}_piano.wav"
            
            # Save MIDI
            processor.save_as_midi(midi, midi_filename)
            
            # Convert MIDI to Audio for preview
            print("Synthesizing audio preview... üéπ")
            FluidSynth(sound_font="/usr/share/sounds/sf2/FluidR3_GM.sf2").midi_to_audio(midi_filename, wav_filename)
            
            print("\n--- üé∂ Result ---")
            display(Audio(wav_filename))
            files.download(midi_filename)
            files.download(wav_filename)
            print(f"‚úÖ Finished: {midi_filename}")
            
        except Exception as e:
            print(f"‚ùå Error processing {audio_path}:")
            traceback.print_exc()


In [None]:
# @title 6. Backup & Optimization (Save Model to Drive) üíæ
# Since Training code is missing in this repo, we will at least save the Pre-trained Model
# and your Downloaded Dataset to Google Drive so you don't download them again.

import shutil
import os
from transformers import Pop2PianoForConditionalGeneration, AutoProcessor

print("üíæ Starting Backup Process...")

# 1. Save PROCESSED Model to Drive (So you can load it offline later)
save_path = os.path.join(os.getcwd(), "saved_models", "pop2piano_cached")
if not os.path.exists(save_path):
    print(f"üì• Downloading and saving model to {save_path}...")
    model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
    processor = AutoProcessor.from_pretrained("sweetcocoa/pop2piano")
    
    model.save_pretrained(save_path)
    processor.save_pretrained(save_path)
    print("‚úÖ Model saved to Google Drive!")
else:
    print("‚úÖ Model already exists on Google Drive.")

# 2. Zip the Dataset (Optional - Good for storage)
# output_dir contains thousands of small files which is slow on Drive. Zipping is better.
if os.path.exists("output_dir"):
    print("üì¶ Zipping downloaded dataset (this may take time)...")
    shutil.make_archive("pop2piano_dataset", 'zip', "output_dir")
    print("‚úÖ Dataset zipped: pop2piano_dataset.zip")
else:
    print("‚ö†Ô∏è No output_dir found to zip.")
