In [1]:
# ==============================================================================
# CELL 1: DEMUCS INSTALLATION
# The official Demucs repository handles its own dependencies.
# This command will install the Demucs package and everything it needs.
# ==============================================================================
print("--- Installing Demucs and its dependencies ---")
# The '-U' flag ensures we get the latest version of the package.
!python -m pip install -U demucs --quiet
print("Installation complete.")

# Verify the installation by checking the version.
!demucs --version

--- Installing Demucs and its dependencies ---
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.1/87.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.6/59.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.7/249.7 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K 

In [2]:
# ==============================================================================
# CELL 2: IMPORTS AND DATA PREPARATION
# ==============================================================================
import os
import torch
import torchaudio
import shutil
from google.colab import drive

print("--- Preparing environment and data ---")
drive.mount('/content/drive')

# Your method for copying the degraded dataset from Google Drive
source_dir = "/content/drive/MyDrive/degraded"
target_dir = "/content/degraded_dataset"
if os.path.exists(source_dir):
    print("Copying degraded dataset folder...")
    # Only copy if the target directory doesn't already exist to save time
    if not os.path.exists(target_dir):
        shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
    else:
        print("Degraded dataset already exists in Colab runtime.")
else:
    print(f"WARNING: Source folder not found at {source_dir}")

# Prepare the output directory for Demucs results
output_dir = "/content/demucs_output"
os.makedirs(output_dir, exist_ok=True)

--- Preparing environment and data ---
Mounted at /content/drive
Copying degraded dataset folder...


In [3]:
# ==============================================================================
# CELL 3: PHASE A - ZERO-SHOT TEST
# Here, we use a pre-trained Demucs model as an advanced cleaning filter.
# ==============================================================================
print("\n--- Phase A: Running Zero-Shot Inference with Demucs ---")

# Select a test file from our degraded dataset
test_input_file = "/content/degraded_dataset/007011 (1).mp3"
if os.path.exists(test_input_file):
    # The Demucs command-line interface is very straightforward.
    # `htdemucs` is the default model, which is very high-quality.
    # `-o` specifies the output directory.
    # The `--two-stems vocals` flag is a robust shortcut that separates the track
    # into just two stems: 'vocals' and 'no_vocals' (the instrumental).
    # Without this flag, it defaults to 4 stems (drums, bass, other, vocals).

    print(f"Applying Demucs to {os.path.basename(test_input_file)}...")
    !python -m demucs --mp3 --two-stems vocals -o "{output_dir}" "{test_input_file}"

    # --- Reconstruct the Cleaned Signal ---
    # Demucs creates a subdirectory inside 'output_dir' named after the model used.
    model_name = "htdemucs" # The default model's name
    # The output path for the stems of our specific test file
    output_subfolder = os.path.join(output_dir, model_name, os.path.splitext(os.path.basename(test_input_file))[0])

    if os.path.exists(output_subfolder):
        print("\nReconstructing the cleaned audio from separated stems...")
        # Load the two separated stems (vocals and the instrumental accompaniment)
        try:
            vocals, sr = torchaudio.load(os.path.join(output_subfolder, "vocals.mp3"))
            no_vocals, _ = torchaudio.load(os.path.join(output_subfolder, "no_vocals.mp3"))

            # Re-add the stems to get the full, cleaned signal
            cleaned_signal = vocals + no_vocals

            # Save the final result
            final_output_path = "/content/demucs_cleaned_output.mp3"
            torchaudio.save(final_output_path, cleaned_signal, sr)

            print("\n--- ZERO-SHOT TEST SUCCESSFUL ---")
            print(f"Cleaned audio file saved to: {final_output_path}")
            print("Listen to this file and compare it to the input. It should have fewer artifacts.")

        except Exception as e:
            print(f"Error during reconstruction: {e}")
            print("Please check the output folder for the separated files.")
    else:
        print(f"ERROR: Demucs output folder not found at {output_subfolder}")
else:
    print(f"ERROR: Test file not found at {test_input_file}")


--- Phase A: Running Zero-Shot Inference with Demucs ---
Applying Demucs to 007011 (1).mp3...
[1mImportant: the default model was recently changed to `htdemucs`[0m the latest Hybrid Transformer Demucs model. In some cases, this model can actually perform worse than previous models. To get back the old default model use `-n mdx_extra_q`.
Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th" to /root/.cache/torch/hub/checkpoints/955717e8-8726e21a.th
100% 80.2M/80.2M [00:00<00:00, 106MB/s]
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /content/demucs_output/htdemucs
Separating track /content/degraded_dataset/007011 (1).mp3
100%|██████████████████████████████████████████████| 35.099999999999994/35.099999999999994 [01:55<00:00,  3.30s/seconds]

Reconstructing the cleaned audio from separated stems...

--- ZERO-SHOT TEST SUCCESSFUL ---
Cleaned audio file saved to: /content/demu

In [10]:
# ==============================================================================
# CELL : COMPARING AUDIOS
# ==============================================================================

import librosa
import numpy as np
import os

def get_audio_identity_card(file_path):
    """
    Analyzes an audio file and returns a dictionary of its key characteristics.
    """
    if not os.path.exists(file_path):
        return {"error": "File not found"}

    try:
        # 1. Load the audio file at its native sample rate
        y, sr = librosa.load(file_path, sr=None)

        # 2. Calculate features
        duration = librosa.get_duration(y=y, sr=sr)

        # Root Mean Square (RMS) for loudness
        rms = librosa.feature.rms(y=y)
        avg_rms = np.mean(rms)

        # Spectral Centroid for brightness
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        avg_spectral_centroid = np.mean(spectral_centroid)

        # Spectral Bandwidth for richness
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        avg_spectral_bandwidth = np.mean(spectral_bandwidth)

        # Zero-Crossing Rate for noisiness/high-frequency content
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y)
        avg_zero_crossing_rate = np.mean(zero_crossing_rate)

        # 3. Create the identity card dictionary
        identity_card = {
            "File Path": os.path.basename(file_path),
            "Sample Rate (Hz)": sr,
            "Duration (s)": f"{duration:.2f}",
            "Loudness (Avg. RMS)": f"{avg_rms:.4f}",
            "Brightness (Avg. Spectral Centroid)": f"{avg_spectral_centroid:.2f} Hz",
            "Richness (Avg. Spectral Bandwidth)": f"{avg_spectral_bandwidth:.2f} Hz",
            "Noisiness (Avg. Zero-Crossing Rate)": f"{avg_zero_crossing_rate:.4f}"
        }

        return identity_card

    except Exception as e:
        return {"error": f"Could not process file: {e}"}

# --- Comparaison des Fichiers via leurs Cartes d'Identité ---

original_degraded_file = "/content/degraded_dataset/007011 (1).mp3"
original_cleaned_file = "/content/007011.mp3"
demucs_file = "/content/demucs_cleaned_output.mp3"

print("--- Audio Identity Cards ---")
print("\n1. Original Degraded Audio:")
card_original = get_audio_identity_card(original_degraded_file)
for key, value in card_original.items():
    print(f"  - {key}: {value}")

print("\n2. Original Cleaned Audio:")
card_original = get_audio_identity_card(original_cleaned_file)
for key, value in card_original.items():
    print(f"  - {key}: {value}")

print("\n3. Demucs-Cleaned Audio:")
card_cleaned = get_audio_identity_card(demucs_file)
for key, value in card_cleaned.items():
    print(f"  - {key}: {value}")

--- Audio Identity Cards ---

1. Original Degraded Audio:
  - File Path: 007011 (1).mp3
  - Sample Rate (Hz): 32000
  - Duration (s): 29.98
  - Loudness (Avg. RMS): 0.1656
  - Brightness (Avg. Spectral Centroid): 1159.65 Hz
  - Richness (Avg. Spectral Bandwidth): 1335.14 Hz
  - Noisiness (Avg. Zero-Crossing Rate): 0.0567

1. Original Cleaned Audio:
  - File Path: 007011.mp3
  - Sample Rate (Hz): 44100
  - Duration (s): 29.98
  - Loudness (Avg. RMS): 0.1830
  - Brightness (Avg. Spectral Centroid): 1840.27 Hz
  - Richness (Avg. Spectral Bandwidth): 3618.82 Hz
  - Noisiness (Avg. Zero-Crossing Rate): 0.0735

2. Demucs-Cleaned Audio:
  - File Path: demucs_cleaned_output.mp3
  - Sample Rate (Hz): 44100
  - Duration (s): 30.01
  - Loudness (Avg. RMS): 0.1559
  - Brightness (Avg. Spectral Centroid): 1159.09 Hz
  - Richness (Avg. Spectral Bandwidth): 1360.62 Hz
  - Noisiness (Avg. Zero-Crossing Rate): 0.0414


# FINE TUNING

In [11]:
# ==============================================================================
# CELL 5: STAGE 1 - ORGANIZING DATA FOR FINE-TUNING
# ==============================================================================
import os
import shutil
from tqdm.notebook import tqdm

print("--- Step 1: Organizing data into the structure required by Demucs ---")

# Define the main paths
base_finetune_dir = "/content/demucs_finetune_data"
train_dir = os.path.join(base_finetune_dir, "train")
valid_dir = os.path.join(base_finetune_dir, "valid")

# Create the directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

# Paths to our source data
degraded_dir = "/content/degraded_dataset"
source_hq_colab_dir = "/content/source_hq_music"

# Ensure the source HQ data is copied over
if not os.path.exists(source_hq_colab_dir):
    source_hq_drive_dir = "/content/drive/MyDrive/007" # Or your HQ music folder name
    print("Copying HQ source dataset folder...")
    if os.path.exists(source_hq_drive_dir):
        shutil.copytree(source_hq_drive_dir, source_hq_colab_dir, dirs_exist_ok=True)
    else:
        print(f"FATAL: HQ source folder not found at {source_hq_drive_dir}")

# Create a list of all file pairs
# We assume filenames match between the degraded and hq folders
all_files = [f for f in os.listdir(degraded_dir) if f.endswith('.mp3')]

# Split into a training and validation set (e.g., 90% / 10%)
split_index = int(len(all_files) * 0.9)
train_files = all_files[:split_index]
valid_files = all_files[split_index:]

print(f"Total files: {len(all_files)}. Training set size: {len(train_files)}. Validation set size: {len(valid_files)}.")

def organize_set(file_list, target_dir):
    """
    Copies files into the expected structure:
    target_dir/
    |-- mixtures/
    |   `-- song.mp3  (our degraded audio)
    `-- sources/
        `-- song/
            `-- mixture.wav (our clean audio, renamed to 'mixture.wav')
    """
    mixtures_path = os.path.join(target_dir, "mixtures")
    sources_path = os.path.join(target_dir, "sources")
    os.makedirs(mixtures_path, exist_ok=True)
    os.makedirs(sources_path, exist_ok=True)

    for filename in tqdm(file_list, desc=f"Organizing {os.path.basename(target_dir)} set"):
        basename = os.path.splitext(filename)[0]

        # Copy the degraded file
        shutil.copy(os.path.join(degraded_dir, filename), os.path.join(mixtures_path, filename))

        # Copy and rename the clean file
        source_file_path = os.path.join(source_hq_colab_dir, filename)
        if os.path.exists(source_file_path):
            target_source_folder = os.path.join(sources_path, basename)
            os.makedirs(target_source_folder, exist_ok=True)
            # The training script expects a 'mixture.wav' file as the clean target
            shutil.copy(source_file_path, os.path.join(target_source_folder, "mixture.wav"))

# Organize both the training and validation sets
organize_set(train_files, train_dir)
organize_set(valid_files, valid_dir)

print("\n--- Data organization complete. ---")

--- Step 1: Organizing data into the structure required by Demucs ---
Copying HQ source dataset folder...
Total files: 37. Training set size: 33. Validation set size: 4.


Organizing train set:   0%|          | 0/33 [00:00<?, ?it/s]

Organizing valid set:   0%|          | 0/4 [00:00<?, ?it/s]


--- Data organization complete. ---


In [12]:
# ==============================================================================
# CELL 6: STAGE 2 - CREATING THE CONFIGURATION FILE (YAML)
# ==============================================================================
import yaml

# Configuration dictionary
# We start from the 'htdemucs' config and modify it for our task
config = {
    'dset': {
        'train': str(train_dir),
        'valid': str(valid_dir),
        'ext': '.mp3',
        'sample_rate': 44100,
        'channels': 1, # We are working with mono audio
        'segment': 8, # Use 8-second segments for training
        'shift': 2,
    },
    'optim': {
        'lr': 1e-4, # Learning rate
        'epochs': 20, # Number of epochs, increase for a full training run
        'batch_size': 8, # Adjust based on GPU memory
        'num_workers': 2,
    },
    'model': 'htdemucs', # The base model we are fine-tuning
    'name': 'denoising_finetune', # A name for our experiment
    'continue_from': 'htdemucs', # Explicitly tells the script to start from htdemucs pre-trained weights
    'save_path': '/content/drive/MyDrive/demucs_experiments/'
}

# Write the dictionary to a YAML file
config_path = "/content/denoiser_finetune.yaml"
with open(config_path, 'w') as f:
    yaml.dump(config, f)

print(f"--- Configuration file created at {config_path} ---")
!cat {config_path} # Display the file content to verify

--- Configuration file created at /content/denoiser_finetune.yaml ---
continue_from: htdemucs
dset:
  channels: 1
  ext: .mp3
  sample_rate: 44100
  segment: 8
  shift: 2
  train: /content/demucs_finetune_data/train
  valid: /content/demucs_finetune_data/valid
model: htdemucs
name: denoising_finetune
optim:
  batch_size: 8
  epochs: 20
  lr: 0.0001
  num_workers: 2
save_path: /content/drive/MyDrive/demucs_experiments/


In [None]:
# ==============================================================================
# CELL 7: STAGE 3 - LAUNCHING THE TRAINING
# WARNING: This is a long process and may use up your Colab resources.
# ==============================================================================
print("\n--- Step 3: Starting the Fine-Tuning Process ---")
print("This will take a long time. Monitor the output for progress.")

# The final command to launch training with our configuration file
# The '-d' flag specifies the device (GPU)
# The command will read our .yaml file and configure everything automatically.
!python -m demucs -d cuda --train -f "{config_path}"