In [None]:
from sklearn.decomposition import NMF
from sklearn.cluster import SpectralClustering
import librosa
import IPython.display
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import scipy
import pandas as pd
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

import librosa.display

%matplotlib nbagg

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
x1, fs = librosa.load('conga.wav')

In [None]:
IPython.display.Audio(x1,rate=fs)

In [None]:
x2, fs = librosa.load('electric_guitar.wav')

In [None]:
x1.shape, x2.shape

In [None]:
x2 = x2[:x1.shape[0]]
IPython.display.Audio(x2, rate=fs)

In [None]:
x3 = 0.4 * x1 + 0.2 * x2
IPython.display.Audio(x3, rate=fs)

In [None]:
S = librosa.stft(x3)
X = np.absolute(S)
plt.figure()
logX = librosa.core.amplitude_to_db(S)
librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log')

## NMF

In [None]:
def logSRR(S, R):
    return 20 * np.log(
        (np.sqrt(np.mean(S**2))) / (np.sqrt(np.mean(R**2)))
    )

In [None]:
scree_plot_data = []

for i in range(2, 21):
        nmf_ = NMF(n_components=i,
                             solver = 'cd', verbose = 0)
        W, H = librosa.decompose.decompose(X,
                                           n_components=i,
                                           transformer=nmf_,
                                           sort=True)
        scree_plot_data.append((i, logSRR(X, np.matmul(W, H))))

        
scree_plot_data = pd.DataFrame(scree_plot_data)
scree_plot_data.columns = ['n_components', 'logSRR']

In [None]:
plt.figure()
sns.lineplot(x='n_components', y='logSRR', data=scree_plot_data, )

In [None]:
n_components = 5
nmf_decomposer = NMF(n_components=n_components,
                     solver = 'cd', verbose = 0, random_state=123)

In [None]:
W, H = librosa.decompose.decompose(X,
                                   n_components=n_components,
                                   transformer=nmf_decomposer,
                                   sort=True)

In [None]:
X.shape, W.shape, H.shape

In [None]:
plt.figure(figsize=(8, 8))
logW = np.log10(W)
for n in range(n_components):
    plt.subplot(np.ceil(n_components/2.0), 2, n+1)
    plt.plot(logW[:,n])
    plt.ylim(-2, logW.max())
    plt.xlim(0, W.shape[0])
    plt.ylabel('Component %d' % n)

In [None]:
plt.figure(figsize=(8, 10))
for n in range(n_components):
    plt.subplot(np.ceil(n_components), 1, n+1)
    plt.plot(H[n])
    plt.ylim(0, H.max())
    plt.xlim(0, H.shape[1])
    plt.ylabel('Component %d' % n)

In [None]:
reconstructed_signal = scipy.zeros(len(x3))
components = list()
for n in range(n_components):
    Y = scipy.outer(W[:,n], H[n])*np.exp(1j*np.angle(S))
    y = librosa.istft(Y)
    components.append(y)
    reconstructed_signal[:len(y)] += y
    IPython.display.display( IPython.display.Audio(y, rate=fs) )

### Reconstructed Signal

In [None]:
IPython.display.display( IPython.display.Audio(reconstructed_signal, rate=fs) )

### Conga Source

In [None]:
IPython.display.Audio(components[0] + components[2], rate=fs)

### Electric Guitar Source

In [None]:
IPython.display.Audio(components[1] + components[3], rate=fs)

### Noisy Component

In [None]:
IPython.display.Audio(components[4], rate=fs)

In [None]:
imshow(cosine_similarity(components))