<a href="https://colab.research.google.com/github/kimdonggyu2008/deep_daiv_-/blob/main/Hybrid_Demucs_Music_Source_Separation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hybrid Demucs from Colab

This supports the Demucs source separation model (https://github.com/facebookresearch/demucs/)
This is only for separation with pre-trained models, not training!

You can either upload files manually (slow) or link your Google Drive account.

In [1]:
#!python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=demucs

In [2]:
!pip install demucs



In [3]:
import demucs.separate
import numpy as np
import librosa, soundfile
import pandas as pd
import os

In [4]:
# Please BE VERY CAREFUL, this will link your entire drive.
# So don't edit code, except the one that says 'Customize the following options',
# or you might mess up your files.
# IF YOU DO NO WANT TO LINK DRIVE, please see below for an alternative!
#from google.colab import drive
#drive.mount('/gdrive')

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Customize the following options!
model = "htdemucs"
extensions = ["mp3", "wav", "ogg", "flac"]  # we will look for all those file types.
two_stems = None   # only separate one stems from the rest, for instance
# two_stems = "vocals"

# Options for the output audio.
mp3 = True
mp3_rate = 320
float32 = False  # output as float 32 wavs, unsused if 'mp3' is True.
int24 = False    # output as int24 wavs, unused if 'mp3' is True.
# You cannot set both `float32 = True` and `int24 = True` !!

in_path = '/content/drive/MyDrive/제목없는 폴더/'
out_path = '/content/drive/MyDrive/제목없는 폴더'

In [11]:
#@title Useful functions, don't forget to execute
import io
from pathlib import Path
import select
from shutil import rmtree
import subprocess as sp
import sys
from typing import Dict, Tuple, Optional, IO

from google.colab import files

def find_files(in_path):
    out = []
    for file in Path(in_path).iterdir():
        if file.suffix.lower().lstrip(".") in extensions:
            out.append(file)
    return out

def copy_process_streams(process: sp.Popen):
    def raw(stream: Optional[IO[bytes]]) -> IO[bytes]:
        assert stream is not None
        if isinstance(stream, io.BufferedIOBase):
            stream = stream.raw
        return stream

    p_stdout, p_stderr = raw(process.stdout), raw(process.stderr)
    stream_by_fd: Dict[int, Tuple[IO[bytes], io.StringIO, IO[str]]] = {
        p_stdout.fileno(): (p_stdout, sys.stdout),
        p_stderr.fileno(): (p_stderr, sys.stderr),
    }
    fds = list(stream_by_fd.keys())

    while fds:
        # `select` syscall will wait until one of the file descriptors has content.
        ready, _, _ = select.select(fds, [], [])
        for fd in ready:
            p_stream, std = stream_by_fd[fd]
            raw_buf = p_stream.read(2 ** 16)
            if not raw_buf:
                fds.remove(fd)
                continue
            buf = raw_buf.decode()
            std.write(buf)
            std.flush()

def silence_below_threshold(audio_data, threshold):
    # 절대값이 threshold보다 작은 값은 0으로 변환
    audio_data[np.abs(audio_data) < threshold] = 0
    return audio_data

# def separate(inp=None, outp=None):
#     inp = inp or in_path
#     outp = outp or out_path
#     cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]
#     if mp3:
#         cmd += ["--mp3", f"--mp3-bitrate={mp3_rate}"]
#     if float32:
#         cmd += ["--float32"]
#     if int24:
#         cmd += ["--int24"]
#     if two_stems is not None:
#         cmd += [f"--two-stems={two_stems}"]
#     files = [str(f) for f in find_files(inp)]
#     if not files:
#         print(f"No valid audio files in {in_path}")
#         return
#     print("Going to separate the files:")
#     print('\n'.join(files))
#     print("With command: ", " ".join(cmd))
#     p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
#     copy_process_streams(p)
#     p.wait()
#     if p.returncode != 0:
#         print("Command failed, something went wrong.")


# def from_upload():
#     out_path = Path('separated')
#     in_path = Path('tmp_in')

#     if in_path.exists():
#         rmtree(in_path)
#     in_path.mkdir()

#     if out_path.exists():
#         rmtree(out_path)
#     out_path.mkdir()

#     uploaded = files.upload()
#     for name, content in uploaded.items():
#         (in_path / name).write_bytes(content)
#     separate(in_path, out_path)

# 보컬과 배경음악만 분리하도록 two_stems 옵션을 추가하여 수정
def separate_vocals_and_instrumental(inp=None, outp=None):
    inp = inp or in_path
    outp = outp or out_path
    model = 'htdemucs'  # 사용할 모델 (예: htdemucs)
    two_stems = 'vocals'  # 두 개의 stem으로 나누기: 보컬과 그 외
    cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]

    # 두 스템 옵션을 추가 (보컬과 인스트루멘탈)
    cmd += [f"--two-stems={two_stems}"]

    # 입력 오디오 파일을 찾기
    files = [str(f) for f in find_files(inp)]
    if not files:
        print(f"No valid audio files in {in_path}")
        return

    # 실행할 명령어 출력
    print("Going to separate the files:")
    print('\n'.join(files))
    print("With command: ", " ".join(cmd))

    # 명령어 실행
    p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()

    if p.returncode != 0:
        print("Command failed, something went wrong.")

# 파일 업로드 후 보컬과 배경음악 분리 실행
def from_upload_vocals_and_instrumental():
    out_path = Path('separated')
    in_path = Path('tmp_in')

    if in_path.exists():
        rmtree(in_path)
    in_path.mkdir()

    if out_path.exists():
        rmtree(out_path)
    out_path.mkdir()

    uploaded = files.upload()
    for name, content in uploaded.items():
        (in_path / name).write_bytes(content)

    # 보컬과 인스트루멘탈만 분리 실행
    separate_vocals_and_instrumental(in_path, out_path)


In [12]:
import numpy as np
import wave
import struct
from scipy.signal import butter, lfilter
from pathlib import Path

# WAV 파일을 읽어들이는 함수
def read_wav(filename):
    with wave.open(filename, 'rb') as wav_file:
        params = wav_file.getparams()  # 오디오 파일의 매개변수
        n_channels, sampwidth, framerate, n_frames = params[:4]

        # 오디오 데이터를 읽어서 numpy 배열로 변환
        frames = wav_file.readframes(n_frames * n_channels)
        out = struct.unpack_from("%dh" % n_frames * n_channels, frames)
        out = np.array(out)

        return out, params

# WAV 파일을 쓰는 함수
def write_wav(filename, audio_data, params):
    n_channels, sampwidth, framerate, n_frames = params[:4]

    with wave.open(filename, 'wb') as wav_file:
        wav_file.setparams(params)

        # numpy 배열을 bytes로 변환하여 파일에 씀
        frames = struct.pack("%dh" % len(audio_data), *audio_data)
        wav_file.writeframes(frames)

# 고역 차단 필터를 적용하는 함수
def high_pass_filter(audio_data, cutoff, fs, order=5):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    filtered_audio = lfilter(b, a, audio_data)
    return filtered_audio

# 분리된 보컬에만 20Hz 이하 소리 제거를 적용하는 함수
def apply_high_pass_filter_to_vocals(output_folder, cutoff=20):
    # 보컬 파일에 대해 필터 적용
    for audio_file in Path(output_folder).glob("**/vocals.wav"):
        print(f"Processing vocals file: {audio_file}")
        audio_data, params = read_wav(audio_file)
        framerate = params[2]

        # 고역 차단 필터 적용 (cutoff 주파수는 20Hz)
        filtered_audio = high_pass_filter(audio_data, cutoff, framerate)
        write_wav(audio_file, filtered_audio.astype(np.int16), params)

# 보컬과 배경음악만 분리하고 보컬에만 20Hz 이하 소리를 제거하는 기능 추가
def separate_vocals_and_instrumental(inp=None, outp=None, cutoff=20):
    inp = inp or in_path
    outp = outp or out_path
    model = 'htdemucs'  # 사용할 모델 (예: htdemucs)
    two_stems = 'vocals'  # 두 개의 stem으로 나누기: 보컬과 그 외
    cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]

    # 두 스템 옵션을 추가 (보컬과 인스트루멘탈)
    cmd += [f"--two-stems={two_stems}"]

    # 입력 오디오 파일을 찾기
    files = [str(f) for f in find_files(inp)]
    if not files:
        print(f"No valid audio files in {in_path}")
        return

    # 실행할 명령어 출력
    print("Going to separate the files:")
    print('\n'.join(files))
    print("With command: ", " ".join(cmd))

    # 명령어 실행
    p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()

    if p.returncode != 0:
        print("Command failed, something went wrong.")
        return

    # 보컬 파일에만 고역 차단 필터 적용 (20Hz 이하 제거)
    apply_high_pass_filter_to_vocals(outp, cutoff)
    print(f"Applied high-pass filter: cutoff={cutoff} Hz to vocals.")

# 파일 업로드 후 보컬과 배경음악 분리 및 보컬에만 20Hz 이하 소리 제거 실행
def from_upload_vocals_and_instrumental(cutoff=20):
    out_path = Path('separated')
    in_path = Path('tmp_in')

    if in_path.exists():
        rmtree(in_path)
    in_path.mkdir()

    if out_path.exists():
        rmtree(out_path)
    out_path.mkdir()

    uploaded = files.upload()
    for name, content in uploaded.items():
        (in_path / name).write_bytes(content)

    # 보컬과 인스트루멘탈 분리 및 보컬에만 20Hz 이하 소리 제거 실행
    separate_vocals_and_instrumental(in_path, out_path, cutoff)


In [9]:
# This can be quite slow, in particular the loading, and saving from GDrive. Please be patient!
# This is from google drive! Also, this will separate all the files inside the MyDrive/demucs folder,
# so when you are happy with the results, remove the songs from there.
# separate_vocals_and_instrumental()

In [13]:
separate_vocals_and_instrumental(cutoff=20)

Going to separate the files:
/content/drive/MyDrive/제목없는 폴더/키움_응원가_솔로1.wav
/content/drive/MyDrive/제목없는 폴더/키움_응원가_솔로2.wav
/content/drive/MyDrive/제목없는 폴더/한화_응원가_솔로1.wav
/content/drive/MyDrive/제목없는 폴더/SSG_응원가_솔로.wav
With command:  python3 -m demucs.separate -o /content/drive/MyDrive/제목없는 폴더 -n htdemucs --two-stems=vocals


Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th" to /root/.cache/torch/hub/checkpoints/955717e8-8726e21a.th
100%|██████████| 80.2M/80.2M [00:00<00:00, 193MB/s]


Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /content/drive/MyDrive/제목없는 폴더/htdemucs
Separating track /content/drive/MyDrive/제목없는 폴더/키움_응원가_솔로1.wav


100%|████████████████████████████████████████████████████████████████████████| 210.6/210.6 [00:13<00:00, 15.28seconds/s]


Separating track /content/drive/MyDrive/제목없는 폴더/키움_응원가_솔로2.wav


100%|██████████████████████████████████████████████| 193.04999999999998/193.04999999999998 [00:09<00:00, 19.86seconds/s]


Separating track /content/drive/MyDrive/제목없는 폴더/한화_응원가_솔로1.wav


100%|██████████████████████████████████████████████████████████████████████| 181.35/181.35 [00:09<00:00, 19.48seconds/s]


Separating track /content/drive/MyDrive/제목없는 폴더/SSG_응원가_솔로.wav


100%|████████████████████████████████████████████████████████████████████████| 117.0/117.0 [00:05<00:00, 19.66seconds/s]


Processing vocals file: /content/drive/MyDrive/제목없는 폴더/htdemucs/키움_응원가_솔로1/vocals.wav


AttributeError: 'PosixPath' object has no attribute 'read'

In [None]:
# # This is manual upload and download :)
# from_upload()
# !zip -r separated.zip separated
# files.download('./separated.zip')