In [None]:
# Audio Stem Separation Test

This notebook tests both Spleeter and Demucs for audio stem separation, processing full song length and comparing their outputs.


In [None]:
# Install required packages
!pip install librosa numpy spleeter demucs diffq soundfile


In [None]:
import os
import time
import torch
import librosa
import numpy as np
from pathlib import Path
from IPython.display import Audio
from spleeter.separator import Separator
from demucs.pretrained import get_model
from demucs.apply import apply_model
import warnings
warnings.filterwarnings('ignore')

# Verify CUDA is available
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


In [None]:
# Stem Separation Test: Spleeter vs Demucs

This notebook will:
1. Install both Spleeter and Demucs
2. Set up the required dependencies (ffmpeg)
3. Process an audio file with both models
4. Compare the results and timing

First, let's install the required packages:


In [None]:
# Stem Separation Test: Spleeter vs Demucs

This notebook will:
1. Install both Spleeter and Demucs
2. Set up the required dependencies (ffmpeg)
3. Process an audio file with both models
4. Compare the results and timing

First, let's install the required packages:


In [None]:
# Stem Separation Test: Spleeter vs Demucs

This notebook will:
1. Install both Spleeter and Demucs
2. Set up the required dependencies (ffmpeg)
3. Process an audio file with both models
4. Compare the results and timing

First, let's install the required packages:


In [None]:
!apt-get update && apt-get install -y ffmpeg
!pip install spleeter demucs


In [None]:
Now let's set up our Python imports and helper functions:


In [None]:
import os
import time
from pathlib import Path
import shutil
import torch

def time_operation(func):
    """Decorator to time operations"""
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        duration = time.time() - start
        print(f"{func.__name__} took {duration:.2f} seconds")
        return result, duration
    return wrapper

@time_operation
def run_spleeter(input_file: str, output_dir: str):
    """Run Spleeter separation"""
    os.system(f'spleeter separate -p spleeter:4stems -o {output_dir} "{input_file}"')
    return output_dir

@time_operation
def run_demucs(input_file: str, output_dir: str):
    """Run Demucs separation"""
    os.system(f'demucs --mp3 --two-stems=vocals "{input_file}" -o {output_dir}')
    return output_dir

# Print GPU info
print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Device:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")


In [None]:
Now we need to upload our test audio file. You can do this by:

1. Click the folder icon in the left sidebar
2. Navigate to where you want to upload (e.g., create a new `input` folder)
3. Upload your audio file using the upload button (up arrow icon)

Once uploaded, we'll process it with both models:


In [None]:
# Set up paths
WORKSPACE_DIR = Path('/workspace')
INPUT_DIR = WORKSPACE_DIR / 'input'
OUTPUT_DIR = WORKSPACE_DIR / 'output'

# Create directories
INPUT_DIR.mkdir(exist_ok=True)
OUTPUT_DIR.mkdir(exist_ok=True)

# List available audio files
print("Available audio files in input directory:")
for file in INPUT_DIR.glob('*.wav'):
    print(f"- {file.name}")
for file in INPUT_DIR.glob('*.mp3'):
    print(f"- {file.name}")


In [None]:
# Set your audio file name here
AUDIO_FILE = "your_audio_file.wav"  # Change this to your uploaded file name
input_path = INPUT_DIR / AUDIO_FILE

if not input_path.exists():
    raise FileNotFoundError(f"Audio file not found: {input_path}")

# Create output directories for each model
spleeter_output = OUTPUT_DIR / 'spleeter'
demucs_output = OUTPUT_DIR / 'demucs'

# Clean up any previous output
if spleeter_output.exists():
    shutil.rmtree(spleeter_output)
if demucs_output.exists():
    shutil.rmtree(demucs_output)

# Run both models
print("\nRunning Spleeter...")
spleeter_result, spleeter_time = run_spleeter(str(input_path), str(spleeter_output))

print("\nRunning Demucs...")
demucs_result, demucs_time = run_demucs(str(input_path), str(demucs_output))

# Print summary
print("\nProcessing Summary:")
print(f"Spleeter processing time: {spleeter_time:.2f} seconds")
print(f"Demucs processing time: {demucs_time:.2f} seconds")

# List output files
print("\nSpleeter output files:")
for file in Path(spleeter_result).rglob('*.wav'):
    print(f"- {file.relative_to(spleeter_output)}")

print("\nDemucs output files:")
for file in Path(demucs_result).rglob('*.wav'):
    print(f"- {file.relative_to(demucs_output)}")


In [None]:
!apt-get update && apt-get install -y ffmpeg
!pip install spleeter demucs


In [None]:
Now let's set up our Python imports and helper functions:


In [None]:
import os
import time
from pathlib import Path
import shutil
import torch

def time_operation(func):
    """Decorator to time operations"""
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        duration = time.time() - start
        print(f"{func.__name__} took {duration:.2f} seconds")
        return result, duration
    return wrapper

@time_operation
def run_spleeter(input_file: str, output_dir: str):
    """Run Spleeter separation"""
    os.system(f'spleeter separate -p spleeter:4stems -o {output_dir} "{input_file}"')
    return output_dir

@time_operation
def run_demucs(input_file: str, output_dir: str):
    """Run Demucs separation"""
    os.system(f'demucs --mp3 --two-stems=vocals "{input_file}" -o {output_dir}')
    return output_dir

# Print GPU info
print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Device:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")


In [None]:
Now we need to upload our test audio file. You can do this by:

1. Click the folder icon in the left sidebar
2. Navigate to where you want to upload (e.g., create a new `input` folder)
3. Upload your audio file using the upload button (up arrow icon)

Once uploaded, we'll process it with both models:


In [None]:
# Set up paths
WORKSPACE_DIR = Path('/workspace')
INPUT_DIR = WORKSPACE_DIR / 'input'
OUTPUT_DIR = WORKSPACE_DIR / 'output'

# Create directories
INPUT_DIR.mkdir(exist_ok=True)
OUTPUT_DIR.mkdir(exist_ok=True)

# List available audio files
print("Available audio files in input directory:")
for file in INPUT_DIR.glob('*.wav'):
    print(f"- {file.name}")
for file in INPUT_DIR.glob('*.mp3'):
    print(f"- {file.name}")


In [None]:
# Set your audio file name here
AUDIO_FILE = "your_audio_file.wav"  # Change this to your uploaded file name
input_path = INPUT_DIR / AUDIO_FILE

if not input_path.exists():
    raise FileNotFoundError(f"Audio file not found: {input_path}")

# Create output directories for each model
spleeter_output = OUTPUT_DIR / 'spleeter'
demucs_output = OUTPUT_DIR / 'demucs'

# Clean up any previous output
if spleeter_output.exists():
    shutil.rmtree(spleeter_output)
if demucs_output.exists():
    shutil.rmtree(demucs_output)

# Run both models
print("\nRunning Spleeter...")
spleeter_result, spleeter_time = run_spleeter(str(input_path), str(spleeter_output))

print("\nRunning Demucs...")
demucs_result, demucs_time = run_demucs(str(input_path), str(demucs_output))

# Print summary
print("\nProcessing Summary:")
print(f"Spleeter processing time: {spleeter_time:.2f} seconds")
print(f"Demucs processing time: {demucs_time:.2f} seconds")

# List output files
print("\nSpleeter output files:")
for file in Path(spleeter_result).rglob('*.wav'):
    print(f"- {file.relative_to(spleeter_output)}")

print("\nDemucs output files:")
for file in Path(demucs_result).rglob('*.wav'):
    print(f"- {file.relative_to(demucs_output)}")


In [None]:
!apt-get update && apt-get install -y ffmpeg
!pip install spleeter demucs


In [None]:
Now let's set up our Python imports and helper functions:


In [None]:
import os
import time
from pathlib import Path
import shutil
import torch

def time_operation(func):
    """Decorator to time operations"""
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        duration = time.time() - start
        print(f"{func.__name__} took {duration:.2f} seconds")
        return result, duration
    return wrapper

@time_operation
def run_spleeter(input_file: str, output_dir: str):
    """Run Spleeter separation"""
    os.system(f'spleeter separate -p spleeter:4stems -o {output_dir} "{input_file}"')
    return output_dir

@time_operation
def run_demucs(input_file: str, output_dir: str):
    """Run Demucs separation"""
    os.system(f'demucs --mp3 --two-stems=vocals "{input_file}" -o {output_dir}')
    return output_dir

# Print GPU info
print("GPU Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Device:", torch.cuda.get_device_name(0))
    print("GPU Memory:", torch.cuda.get_device_properties(0).total_memory / 1e9, "GB")


In [None]:
Now we need to upload our test audio file. You can do this by:

1. Click the folder icon in the left sidebar
2. Navigate to where you want to upload (e.g., create a new `input` folder)
3. Upload your audio file using the upload button (up arrow icon)

Once uploaded, we'll process it with both models:


In [None]:
# Set up paths
WORKSPACE_DIR = Path('/workspace')
INPUT_DIR = WORKSPACE_DIR / 'input'
OUTPUT_DIR = WORKSPACE_DIR / 'output'

# Create directories
INPUT_DIR.mkdir(exist_ok=True)
OUTPUT_DIR.mkdir(exist_ok=True)

# List available audio files
print("Available audio files in input directory:")
for file in INPUT_DIR.glob('*.wav'):
    print(f"- {file.name}")
for file in INPUT_DIR.glob('*.mp3'):
    print(f"- {file.name}")


In [None]:
# Set your audio file name here
AUDIO_FILE = "your_audio_file.wav"  # Change this to your uploaded file name
input_path = INPUT_DIR / AUDIO_FILE

if not input_path.exists():
    raise FileNotFoundError(f"Audio file not found: {input_path}")

# Create output directories for each model
spleeter_output = OUTPUT_DIR / 'spleeter'
demucs_output = OUTPUT_DIR / 'demucs'

# Clean up any previous output
if spleeter_output.exists():
    shutil.rmtree(spleeter_output)
if demucs_output.exists():
    shutil.rmtree(demucs_output)

# Run both models
print("\nRunning Spleeter...")
spleeter_result, spleeter_time = run_spleeter(str(input_path), str(spleeter_output))

print("\nRunning Demucs...")
demucs_result, demucs_time = run_demucs(str(input_path), str(demucs_output))

# Print summary
print("\nProcessing Summary:")
print(f"Spleeter processing time: {spleeter_time:.2f} seconds")
print(f"Demucs processing time: {demucs_time:.2f} seconds")

# List output files
print("\nSpleeter output files:")
for file in Path(spleeter_result).rglob('*.wav'):
    print(f"- {file.relative_to(spleeter_output)}")

print("\nDemucs output files:")
for file in Path(demucs_result).rglob('*.wav'):
    print(f"- {file.relative_to(demucs_output)}")


In [None]:
# Set up paths
WORKSPACE_DIR = Path('/workspace')
INPUT_DIR = WORKSPACE_DIR / 'input'
OUTPUT_DIR = WORKSPACE_DIR / 'output'
SPLEETER_OUTPUT = OUTPUT_DIR / 'spleeter'
DEMUCS_OUTPUT = OUTPUT_DIR / 'demucs'

# Create directories
INPUT_DIR.mkdir(exist_ok=True)
OUTPUT_DIR.mkdir(exist_ok=True)
SPLEETER_OUTPUT.mkdir(exist_ok=True)
DEMUCS_OUTPUT.mkdir(exist_ok=True)

# List available audio files
print("Available audio files in input directory:")
for file in INPUT_DIR.glob('*.wav'):
    print(f"- {file.name}")
for file in INPUT_DIR.glob('*.mp3'):
    print(f"- {file.name}")

# Select first available audio file
audio_files = list(INPUT_DIR.glob('*.wav')) + list(INPUT_DIR.glob('*.mp3'))
if not audio_files:
    raise ValueError("No audio files found in input directory!")
INPUT_FILE = audio_files[0]

# Load and verify audio file
y, sr = librosa.load(str(INPUT_FILE), sr=None, duration=None)  # Load full file
duration = librosa.get_duration(y=y, sr=sr)
print(f"\nSelected file: {INPUT_FILE.name}")
print(f"Audio file duration: {duration:.2f} seconds")
print(f"Sample rate: {sr} Hz")
print(f"Total samples: {len(y)}")


In [None]:
# Spleeter separation
def run_spleeter():
    start_time = time.time()
    
    # Initialize Spleeter with 4 stems
    separator = Separator('spleeter:4stems')
    
    # Process the audio file
    print("Starting Spleeter separation...")
    separator.separate_to_file(
        str(INPUT_FILE),
        str(SPLEETER_OUTPUT),
        codec='wav',
        duration=None  # Process full song
    )
    
    end_time = time.time()
    print(f"Spleeter processing time: {end_time - start_time:.2f} seconds")
    
    # Verify output files
    expected_stems = ['vocals', 'drums', 'bass', 'other']
    for stem in expected_stems:
        stem_path = SPLEETER_OUTPUT / INPUT_FILE.stem / f"{stem}.wav"
        if stem_path.exists():
            y, sr = librosa.load(str(stem_path), sr=None)
            print(f"Spleeter {stem} stem duration: {librosa.get_duration(y=y, sr=sr):.2f} seconds")
        else:
            print(f"Warning: {stem} stem file not found!")

# Run Spleeter
run_spleeter()


In [None]:
# Demucs separation
def run_demucs():
    start_time = time.time()
    
    # Load the htdemucs_ft model which has better separation quality
    # and supports drums, bass, vocals, and other stems
    model = get_model('htdemucs_ft')
    model.cuda()  # Move model to GPU
    
    print("Starting Demucs separation...")
    
    # Load audio file
    wav, sr = librosa.load(str(INPUT_FILE), sr=44100, mono=False)
    if len(wav.shape) == 1:
        wav = np.stack([wav, wav])  # Convert mono to stereo
    
    # Convert to torch tensor
    wav = torch.tensor(wav, dtype=torch.float32)
    
    # Apply the model
    with torch.no_grad():
        sources = apply_model(model, wav[None], device='cuda', progress=True, num_workers=4)[0]
    
    # Save each stem
    sources = list(sources)
    source_names = ['drums', 'bass', 'other', 'vocals']  # Order matches model output
    
    # Create output directory
    output_dir = DEMUCS_OUTPUT / INPUT_FILE.stem
    output_dir.mkdir(exist_ok=True)
    
    # Save and verify each stem
    for source, name in zip(sources, source_names):
        source = source.cpu().numpy()
        stem_path = output_dir / f"{name}.wav"
        
        # Save as WAV file
        import soundfile as sf
        sf.write(str(stem_path), source.T, sr)
        
        # Verify the saved file
        y, sr = librosa.load(str(stem_path), sr=None)
        print(f"Demucs {name} stem duration: {librosa.get_duration(y=y, sr=sr):.2f} seconds")
    
    end_time = time.time()
    print(f"Demucs processing time: {end_time - start_time:.2f} seconds")

# Run Demucs
run_demucs()


In [None]:
# Compare output file sizes and durations
def compare_outputs():
    print("\nComparing outputs:")
    print("-" * 50)
    
    stems = ['vocals', 'drums', 'bass', 'other']
    
    for stem in stems:
        spleeter_path = SPLEETER_OUTPUT / INPUT_FILE.stem / f"{stem}.wav"
        demucs_path = DEMUCS_OUTPUT / INPUT_FILE.stem / f"{stem}.wav"
        
        if spleeter_path.exists() and demucs_path.exists():
            # Get file sizes
            spleeter_size = spleeter_path.stat().st_size / (1024 * 1024)  # MB
            demucs_size = demucs_path.stat().st_size / (1024 * 1024)  # MB
            
            # Get durations
            s_y, s_sr = librosa.load(str(spleeter_path), sr=None)
            d_y, d_sr = librosa.load(str(demucs_path), sr=None)
            
            spleeter_duration = librosa.get_duration(y=s_y, sr=s_sr)
            demucs_duration = librosa.get_duration(y=d_y, sr=d_sr)
            
            print(f"\n{stem.capitalize()} stem comparison:")
            print(f"Spleeter: {spleeter_size:.2f}MB, {spleeter_duration:.2f}s, {s_sr}Hz")
            print(f"Demucs:  {demucs_size:.2f}MB, {demucs_duration:.2f}s, {d_sr}Hz")
        else:
            print(f"\nWarning: Could not find both {stem} stem files for comparison")

# Run comparison
compare_outputs()
