In [17]:
import os
import librosa
import soundfile as sf
import subprocess


In [25]:
import random
import string
import time


def generate_random_filename(extension=".wav", length=8):
    timestamp = int(time.time())
    random_str = ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
    return f"{random_str}_{timestamp}"

In [18]:
def convert_mp3_to_16k_wav(input_mp3_path: str, output_dir: str, output_filename: str = None) -> str:
    os.makedirs(output_dir, exist_ok=True)

    if output_filename is None:
        output_filename = os.path.splitext(os.path.basename(input_mp3_path))[0]

    output_path = os.path.join(output_dir, f"{output_filename}_16k.wav")

    audio, sr = librosa.load(input_mp3_path, sr=16000)
    sf.write(output_path, audio, samplerate=16000)

    return output_path

In [19]:
def run_deepfilter_with_output_dir(input_wav_path: str, output_dir: str) -> str:
    os.makedirs(output_dir, exist_ok=True)

    command = [
        "deepFilter",
        input_wav_path,
        "--output-dir", output_dir
    ]
    subprocess.run(command, check=True)

    base_name = os.path.splitext(os.path.basename(input_wav_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}_enhanced.wav")
    return output_path

In [20]:
def preprocess_audio(input_mp3_path: str, output_dir: str) -> str:
    """
    Preprocesses an MP3 audio file by converting to 16kHz WAV, applying DeepFilterNet,
    and returning the enhanced audio path. Deletes the intermediate unfiltered WAV.

    Args:
        input_mp3_path (str): Path to the MP3 file.
        output_dir (str): Directory to store the final enhanced WAV file.

    Returns:
        str: Path to the final filtered/enhanced WAV file.
    """
    print(f"🔄 Preprocessing: {input_mp3_path}")
    
    # Step 1: Convert MP3 to 16kHz WAV
    wav_path = convert_mp3_to_16k_wav(input_mp3_path, output_dir)
    
    # Step 2: Apply DeepFilterNet
    enhanced_wav_path = run_deepfilter_with_output_dir(wav_path, output_dir)
    
    # Step 3: Remove the intermediate WAV file
    if os.path.exists(wav_path):
        os.remove(wav_path)
        print(f"🗑️ Removed intermediate file: {wav_path}")

    print(f"✅ Final enhanced file: {enhanced_wav_path}")
    return enhanced_wav_path

In [3]:
import glob

In [7]:
!ls vocals/mdx_extra_q/2g1u3998_1743335826

no_vocals.mp3  vocals.mp3


In [10]:
vocal_0 = glob.glob('vocals/mdx_extra_q/*/vocals.mp3')
vocal_1 = glob.glob('vocal_1/mdx_extra_q/*/vocals.mp3')
vocal_2 = glob.glob('vocal_2/mdx_extra_q/*/vocals.mp3')

In [14]:
files_name = vocal_0 + vocal_1 + vocal_2

In [28]:
# !deepFilter -i filter_waves/vocals_16k.wav -o clean_final

In [16]:
# len(files_name)

In [21]:
preprocessed_file = preprocess_audio("vocals/mdx_extra_q/2g1u3998_1743335826/vocals.mp3", "filter_waves/")

🔄 Preprocessing: vocals/mdx_extra_q/2g1u3998_1743335826/vocals.mp3


  from torchaudio.backend.common import AudioMetaData
fatal: not a git repository (or any of the parent directories): .git


2025-03-31 07:22:04 | INFO     | DF | Running on torch 2.6.0+cu124
2025-03-31 07:22:04 | INFO     | DF | Running on host Ubuntu-2404-noble-amd64-base
2025-03-31 07:22:04 | INFO     | DF | Loading model settings of DeepFilterNet3
2025-03-31 07:22:04 | INFO     | DF | Using DeepFilterNet3 model at /root/.cache/DeepFilterNet/DeepFilterNet3
2025-03-31 07:22:04 | INFO     | DF | Initializing model `deepfilternet3`
2025-03-31 07:22:05 | INFO     | DF | Found checkpoint /root/.cache/DeepFilterNet/DeepFilterNet3/checkpoints/model_120.ckpt.best with epoch 120
2025-03-31 07:22:05 | INFO     | DF | Running on device cuda:0
2025-03-31 07:22:05 | INFO     | DF | Model loaded


  return ta_resample(audio, orig_sr, new_sr, **params)
Traceback (most recent call last):
  File "/root/anaconda3/envs/research_env/bin/deepFilter", line 8, in <module>
    sys.exit(run())
  File "/root/anaconda3/envs/research_env/lib/python3.10/site-packages/df/enhance.py", line 378, in run
    main(args)
  File "/root/anaconda3/envs/research_env/lib/python3.10/site-packages/df/enhance.py", line 78, in main
    audio = enhance(
  File "/root/anaconda3/envs/research_env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/root/anaconda3/envs/research_env/lib/python3.10/site-packages/df/enhance.py", line 235, in enhance
    enhanced = model(spec.clone(), erb_feat, spec_feat)[0].cpu()
  File "/root/anaconda3/envs/research_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/root/anaconda3/envs/research_env/lib/python3.

CalledProcessError: Command '['deepFilter', 'filter_waves/vocals_16k.wav', '--output-dir', 'filter_waves/']' returned non-zero exit status 1.

In [29]:
import os
import subprocess

def run_deepfilter_with_output_dir(input_wav_path: str, output_dir: str):
    """
    Runs DeepFilterNet (via CLI) on a WAV file and saves the enhanced file in a given output directory.

    Args:
        input_wav_path (str): Path to the input WAV file.
        output_dir (str): Directory to store the enhanced WAV output.

    Returns:
        str: Path to the enhanced WAV file.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Run deepFilter command with custom output directory
    command = [
        "deepFilter",
        input_wav_path,
        "--output-dir", output_dir
    ]

    print(f"🚀 Running DeepFilterNet on: {input_wav_path}")
    subprocess.run(command, check=True)

    # Expected output file name
    base_name = os.path.splitext(os.path.basename(input_wav_path))[0]
    output_path = os.path.join(output_dir, f"{base_name}_enhanced.wav")

    print(f"✅ Saved enhanced file to: {output_path}")
    return output_path


In [30]:
def convert_mp3_to_16k_wav(input_mp3_path: str, output_dir: str, output_filename: str = None) -> str:
    os.makedirs(output_dir, exist_ok=True)

    if output_filename is None:
        output_filename = os.path.splitext(os.path.basename(input_mp3_path))[0]

    output_path = os.path.join(output_dir, f"{output_filename}_16k.wav")

    audio, sr = librosa.load(input_mp3_path, sr=16000)
    sf.write(output_path, audio, samplerate=16000)

    return output_path

In [31]:
convert_mp3_to_16k_wav('vocal_2/mdx_extra_q/0dz5hdnw_1743334548/vocals.mp3','preprocess_waves',generate_random_filename())

'preprocess_waves/w4w31k34_1743399043_16k.wav'

In [43]:
# run_deepfilter_with_output_dir('preprocess_waves/w4w31k34_1743399043_16k.wav','preprocess_waves')

In [38]:
import librosa

def get_audio_duration(audio_path: str) -> float:
    """
    Returns the duration of an audio file in seconds.

    Args:
        audio_path (str): Path to the audio file.

    Returns:
        float: Duration in seconds.
    """
    try:
        audio, sr = librosa.load(audio_path, sr=None)  # Load with original sampling rate
        duration = len(audio) / sr / 60.0
        print(f"⏱️ Duration of {audio_path}: {duration:.2f} seconds")
        return duration
    except Exception as e:
        print(f"❌ Error reading audio: {e}")
        return 0.0