In [1]:
import bisect
import librosa
import numpy as np
from numpy.linalg import norm
from numpy.linalg import svd 
from scipy.sparse.linalg import svds
import scipy.io.wavfile as wavfile

In [3]:
def load_data(path, filename, sampling_rate=16000, mix=False):
    '''
    Function to load the '.wav' data
    @param path: str, where data is read from
    @param filename: str, file name, should end in '.wav'
    @param sampling rate: int, default is 16000. 22,050 is also a common value but not applicable to this project
    @param mix: bool, if True, then the left sound track and right sound track are mixed to a mono track signal
    '''
    song = librosa.load(path + filename, sr = sampling_rate, mono=mix)[0]
    if mix:
        return song
    else:
        return song[0], song[1]
    
def save_data(path, filename, data, sampling_rate=16000):
    wavfile.write(path+filename, rate=sampling_rate, data=data)

In [35]:
import rpca
def separate_signal_with_RPCA(M, improve=False):
    #Short-Time Fourier Transformation
    M_stft = librosa.stft(M, n_fft=1024, hop_length=256)
    # Get magnitude and phase
    M_mag, M_phase = librosa.magphase(M_stft)
    # RPCA
    L_hat, S_hat, r_hat, n_iter_hat = rpca.pcp_alm(M_mag)
    # Append phase back to result
    L_output = np.multiply(L_hat, M_phase)
    S_output = np.multiply(S_hat, M_phase)
    
    if improve:
        L_hat, S_hat, r_hat, n_iter_hat = rpca.pcp_alm(np.abs(S_output))
        S_output = np.multiply(S_hat, M_phase)
        L_output = M_stft - S_output

    return M_stft, L_output, S_output

def get_LS_istft(M):
    M_stft = librosa.stft(M, n_fft=1024, hop_length=256)
    
    M_mag, M_phase = librosa.magphase(M_stft)
    L_hat, S_hat, r_hat, n_iter_hat = pcp_alm(M_mag)
    
    # Append phase back to result
    L_output = np.multiply(L_hat, M_phase)
    S_output = np.multiply(S_hat, M_phase)
    
    # Inverse Short-Time Fourier Transformation
    #L_istft = librosa.istft(L_output, hop_length=256)
    #S_istft = librosa.istft(S_output, hop_length=256)
    return M_stft, L_output, S_output

In [12]:
def time_freq_masking(M_stft, L_hat, S_hat, gain):
    mask = np.abs(S_hat)- gain* np.abs(L_hat)
    print(mask.shape)
    mask = (mask>0) * 1
    X_sing = np.multiply(mask, M_stft)
    X_music = np.multiply(1-mask, M_stft)
    return X_sing, X_music

# Example
data_path = '../data/MIR-1K_for_MIREX/Wavfile/'
output_path = '../output/'
filename = 'abjones_1_01.wav'
music, voice = load_data(data_path, filename)

# Short-Time Fourier Transformation
music_stft = librosa.stft(music)
voice_stft = librosa.stft(voice)

In [13]:
# Example MIXED TO MONO
data_path = '../data/MIR-1K_for_MIREX/Wavfile/'
output_path = '../output/'
filename = 'davidson_2_01.wav'
M = load_data(data_path, filename, mix=True)

In [21]:
save_data(output_path, filename+'_mix.wav', data=M)

In [14]:
import time

In [16]:
# Short-Time Fourier Transformation
start = time.time()
M_stft, L_stft, S_stft = separate_signal_with_RPCA(M)
end = time.time()
print(start-end)

frob_norm 714.333
two_norm 526.929
one_norm 94893.4
info_norm 38.3415
gamma 0.0441510785688
-60.45458221435547


In [31]:
X_sing, X_music = time_freq_masking(M_stft, L_stft, S_stft, 1.5)

(513, 479)


In [32]:
X_sing_istft = librosa.istft(X_sing, hop_length=256)

In [33]:
X_music_istft = librosa.istft(X_music, hop_length=256)

In [29]:
save_data(output_path, filename+'_mask1_S_output.wav', data=X_sing_istft)

In [20]:
save_data(output_path, filename+'_mask_S_output.wav', data=X_sing_istft)

In [34]:
save_data(output_path, filename+'_mask_L15_output.wav', data=X_music_istft)

In [None]:
###test###

import numpy as np

n = 100
r = 3
np.random.seed(123)
base = 100 + np.cumsum(np.random.randn(n, r), axis=0)
scales = np.abs(np.random.randn(n, r))
L = np.dot(base, scales.T)
S = np.round(0.25 * np.random.randn(n, n))
M = L + S

L_hat, S_hat, r, n_iter = pcp_alm(M,500)
print(np.max(np.abs(S - S_hat)))
print(np.max(np.abs(L - L_hat)))
print(n_iter)



_, s, _ = np.linalg.svd(L, full_matrices=False)
%print (s[s > 1e-11])

_, s_hat, _ = np.linalg.svd(L_hat, full_matrices=False)
%print (s_hat[s_hat > 1e-11])

#### Reference
Code Reference: https://github.com/apapanico/sklearn-rpca/blob/master/sklearnrpca/rpca_alm.py 
Modified based on the alternativate direction algorithm presented in the original paper: https://statweb.stanford.edu/~candes/papers/RobustPCA.pdf