In [1]:
import requests
import numpy as np
import soundfile as sf


def get_visualization_data(audio_file_path):
    # Load audio file
    audio_data, sample_rate = sf.read(audio_file_path)

    # Send audio data to Open-Unmix API for source separation
    url = "https://www.audiocontentanalysis.org/opunmix/parametric"
    headers = {"Content-Type": "application/octet-stream"}
    response = requests.post(url, headers=headers, data=audio_data.tobytes())

    # Parse response
    mask_data = np.frombuffer(response.content, dtype=np.float32)
    mask_data = np.reshape(mask_data, (5, len(audio_data)))

    # Construct custom data object
    visualization_data = {}
    for i in range(5):
        color = [255, 255, 255]  # default color is white
        if i == 0:
            color = [255, 0, 0]  # red for vocal
        elif i == 1:
            color = [0, 255, 0]  # green for drums
        elif i == 2:
            color = [0, 0, 255]  # blue for bass
        elif i == 3:
            color = [255, 255, 0]  # yellow for other
        elif i == 4:
            color = [255, 0, 255]  # magenta for background

        visualization_data[str(i)] = {
            "color": color,
            "data": np.abs(mask_data[i]).tolist(),
        }

    return visualization_data


In [2]:
audio_path = "/Users/marcowhite/Documents/vortex/source_separation/panorama.mp3"
vis_data = get_visualization_data(audio_path)
print(vis_data)

SSLError: HTTPSConnectionPool(host='www.audiocontentanalysis.org', port=443): Max retries exceeded with url: /opunmix/parametric (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2396)')))

In [6]:
import os
import requests
from pydub import AudioSegment
from demucs.pretrained import load_pretrained

# Download the pretrained Demucs model
model = load_pretrained('demucs_extra').cuda()

# Define function to separate audio
def separate_audio(file_path):
    # Load audio using pydub
    audio = AudioSegment.from_file(file_path)

    # Export audio as WAV file
    wav_path = os.path.splitext(file_path)[0] + '.wav'
    audio.export(wav_path, format='wav')

    # Load audio using Demucs
    stems = model.separate_file(wav_path)

    # Export separated stems as WAV files
    for stem_idx, stem_audio in enumerate(stems):
        stem_path = os.path.splitext(file_path)[0] + '_stem{}.wav'.format(stem_idx)
        stem_audio.to_file(stem_path)

    # Delete intermediate WAV file
    os.remove(wav_path)

    print('Separation complete.')

# Test the function with an example file
file_url = 'https://file-examples-com.github.io/uploads/2017/11/file_example_MP3_700KB.mp3'
file_name = os.path.basename(file_url)
file_path = os.path.join(os.getcwd(), file_name)

response = requests.get(file_url)

with open(file_path, 'wb') as f:
    f.write(response.content)

separate_audio(file_path)


ImportError: cannot import name 'load_pretrained' from 'demucs.pretrained' (/Users/marcowhite/anaconda3/lib/python3.10/site-packages/demucs/pretrained.py)

In [7]:
import requests
from pydub import AudioSegment
import torch
import demucs

# Download the pretrained Demucs model
demucs.download_pretrained('demucs_extra')

# Load the pretrained Demucs model
model = demucs.models.Demucs.load('demucs_extra').cuda()

# Define a function to separate vocals from a given audio file
def separate_vocals(audio_file):
    # Load the audio file using PyDub
    audio = AudioSegment.from_file(audio_file)

    # Convert the audio file to 16-bit PCM format
    audio = audio.set_sample_width(2)

    # Convert the audio file to a PyTorch tensor
    audio_tensor = torch.tensor(audio.get_array_of_samples()).cuda()

    # Reshape the audio tensor to have shape (1, num_samples)
    audio_tensor = audio_tensor.reshape(1, -1)

    # Separate the vocals using the Demucs model
    with torch.no_grad():
        sources = model(audio_tensor)

    # Convert the separated vocals to a PyDub audio segment
    vocals = AudioSegment(
        samples=sources[0].cpu().numpy().astype('int16'),
        frame_rate=audio.frame_rate,
        channels=1
    )

    # Return the separated vocals as a PyDub audio segment
    return vocals


AttributeError: module 'demucs' has no attribute 'download_pretrained'

In [4]:
from urllib.request import urlopen
urlopen('https://www.howsmyssl.com/a/check').read()

b'{"given_cipher_suites":["TLS_AES_256_GCM_SHA384","TLS_CHACHA20_POLY1305_SHA256","TLS_AES_128_GCM_SHA256","TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256","TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256","TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384","TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384","TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256","TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256","TLS_DHE_RSA_WITH_AES_256_GCM_SHA384","TLS_DHE_RSA_WITH_AES_128_GCM_SHA256","TLS_DHE_RSA_WITH_AES_256_CBC_SHA256","TLS_DHE_RSA_WITH_AES_128_CBC_SHA256","TLS_EMPTY_RENEGOTIATION_INFO_SCSV"],"ephemeral_keys_supported":true,"session_ticket_supported":true,"tls_compression_supported":false,"unknown_cipher_suite_supported":false,"beast_vuln":false,"able_to_detect_n_minus_one_splitting":false,"insecure_cipher_suites":{},"tls_version":"TLS 1.3","rating":"Probably Okay"}'

In [8]:
import openunmix

# Load the Open-Unmix model
model = openunmix.umxhq()

# Load the mixed audio file
audio_path = "/Users/marcowhite/Documents/vortex/source_separation/panorama.mp3"
audio, sr = openunmix.load_audio(audio_path)

# Separate the sources
estimates = model(audio)

# Write the estimates to separate files
for i, source in enumerate(estimates):
    output_path = f"/path/to/output/source_{i}.wav"
    openunmix.write_audio(output_path, source, sr)


AttributeError: module 'openunmix' has no attribute 'load_audio'