In [1]:
import numpy as np
from scipy.optimize import minimize
import librosa




def get_feature_audio(filename):
    sr = 8192
    y, fs = librosa.load(filename, mono=True, sr=sr)
    feature = y ** 2.0
    return feature


def linear_model(x, A, y):
    return np.linalg.norm(np.dot(A, x) - y, ord=2)



In [2]:
import os
import sox
import tempfile

def hex_to_stem_list(hex_file):
    temp_path = tempfile.mkdtemp() + '/'

    output_mapping = {'0': {1: [1]},
                      '1': {1: [2]},
                      '2': {1: [3]},
                      '3': {1: [4]},
                      '4': {1: [5]},
                      '5': {1: [6]}
                      }
    stem_files = []
    for mix_type, remix_dict in output_mapping.items():
        tfm = sox.Transformer()
        tfm.remix(remix_dictionary=remix_dict)
        output_path = os.path.join(temp_path, '{}.wav'.format(mix_type))
        stem_files.append(output_path)
        tfm.build(hex_file, output_path)
    return stem_files # a list of stem file names.

In [3]:
mix_file = '/Users/tom/Music/DataSet/test_set_ref/eh_BN1-129-Eb_mic_comp.wav'
hex_file = '/Users/tom/Music/DataSet/test_set_cleaned2/eh_BN1-129-Eb_hex_comp_cleaned.wav'
stem_files = hex_to_stem_list(hex_file)

In [4]:

def analyze_mix_audio(mix_file, stem_files):
    mix_audio = get_feature_audio(mix_file)

#     stems = mtrack.stems
#     stem_indices = list(stems.keys())
#     n_stems = len(stem_indices)
#     stem_files = [stems[k].audio_path for k in stem_indices]
    stem_audio = np.array(
        [get_feature_audio(_) for _ in stem_files]
    )
    n_stems = stem_audio.shape[0]
    stem_indices = range(n_stems)
    # force weights to be between 0.01 and 100
    bounds = tuple([(0.01, 100.0) for _ in range(n_stems)])
    res = minimize(
        linear_model, x0=np.ones((n_stems,)), args=(stem_audio.T, mix_audio.T),
        bounds=bounds
    )
    coefs = res['x']

    mixing_coeffs = {
        int(i): float(c) for i, c in zip(stem_indices, coefs)
    }
    return mixing_coeffs


In [5]:
mixing_coeffs = analyze_mix_audio(mix_file, stem_files)

In [6]:
mixing_coeffs

{0: 4.686577773164704,
 1: 4.117686649927694,
 2: 1.1625930328961953,
 3: 3.8748405182139765,
 4: 2.9273201881978563,
 5: 35.75451270914055}