### Implementation

Necessary imports:

In [1]:
import sklearn.decomposition as dec
import librosa
import soundfile as sf
import numpy as np

Function to preprocess data - cuting and mixing:

In [2]:
def mix_sound(sounds, mixture):
    mix_sound = np.zeros(sounds[0].shape[0])
    for i in range(len(mixture)):
        mix_sound += sounds[i]*mixture[i]
    return mix_sound
        


def preprocess_data(sounds, mixtures):
    min_length = float("inf")
    for sound in sounds:
        length = sound.shape[0]
        if length < min_length:
            min_length = length

    cut_sounds = []
    for sound in sounds:
        cut_sounds.append(sound[:min_length])

    mixed_sounds = []
    for i in range(len(sounds)):
        mixed_sounds.append(mix_sound(cut_sounds,mixtures[i]))
    return mixed_sounds

Functions to center and whiten the data:

In [3]:
def center_data(X):
    data = []
    for result in X:
        centered = result - np.mean(result)
        data.append(centered)
    return data

def whiten(X):
    covar_of_sig = np.cov(X)
    eig_val, eig_vec = np.linalg.eig(covar_of_sig)
    diag_eig = np.diag(eig_val)
    inv_sqr_of_diag = np.sqrt(np.linalg.pinv((diag_eig)))
    whiten_trans = np.dot(eig_vec, np.dot(inv_sqr_of_diag, eig_vec.T))
    whitened_sig = np.dot(whiten_trans, X)
    return whitened_sig

In [4]:
def f1(x):
	return 1/(1 + np.exp(-x))

def f1_der(x):
    d = f1(x)
    return d*(1 - d)

ICA function:

In [5]:
def ourICA(audio_sources, epsilon = 1e7):
    centered = center_data(audio_sources)
    sig_matrix = np.vstack(centered)
    whitened_signal_matr = whiten(sig_matrix)

    comps_of_V = []
    for i in range(whitened_signal_matr.shape[0]):
        num_of_sources = sig_matrix.shape[0]
        length_of_track = sig_matrix.shape[1]

        v1 = np.random.rand(num_of_sources)
        v1 = v1/np.linalg.norm(v1)
        v2 = np.random.rand(num_of_sources)
        v2 = v2/np.linalg.norm(v2)

        while( (1 - np.abs(np.dot(v1.T,v2))) > epsilon):
            v1 = v2
            first = np.dot(whitened_signal_matr, f1(np.dot(v2.T, whitened_signal_matr)))/length_of_track
            second = np.mean(f1_der(np.dot(v2.T, whitened_signal_matr)))*v2
            v2 = first - second
            v3 = v2
            for pres_comp in comps_of_V:
                v3 = v3 - np.dot(v2.T, pres_comp)*pres_comp
            v2 = v3
            v2 = v2/np.linalg.norm(v2)
        comps_of_V.append(v1)
    V = np.vstack(comps_of_V)
    S = np.dot(V, whitened_signal_matr)
    return S

### Application

Loading the data:

In [6]:
source1, sample_rate2 = librosa.load("./data/input_data/hello.mp3")
source2, sample_rate1 = librosa.load("./data/input_data/tryvoha.mp3")
source3, sample_rate1 = librosa.load("./data/input_data/vidbiy.mp3")

Mixing sources:

In [7]:
mixed_sources = preprocess_data([source1, source2, source3], [[0.4, 0.3, 0.3], [0.3, 0.4, 0.3], [0.3, 0.3, 0.4]])

Unmixing using our implementation:

In [8]:
EPS = 1e-15 # epsilon, recommended over 1e-12
our_sources = ourICA(mixed_sources, epsilon=EPS)

Unmixing using in-built ICA function:

In [9]:
ica_performer = dec.FastICA(n_components = len(mixed_sources))
inbuilt_sources = ica_performer.fit_transform(list(zip(*mixed_sources))).T

Write results into files:

In [10]:
for i in range(len(mixed_sources)):
    sf.write(f"./data/output_data/source{i}mixed.mp3", mixed_sources[i], sample_rate1)
for i in range(len(mixed_sources)):
    sf.write(f"./data/output_data/source{i}our.mp3", our_sources[i], sample_rate1)
for i in range(len(mixed_sources)):
    sf.write(f"./data/output_data/source{i}inbuilt.mp3", inbuilt_sources[i], sample_rate1)