# CS280 Programming Assignment 4
__Cocktail Party Problem__<br>
<br>
Compiler: Python 3.6.5<br>
OS: Windows 7 64-bit

## 1. Load the audio files mic1.wav to mic5.wav found in the folder Audio_Data. 
These files are synchronized audio recordings captured by five microphones positioned at five different locations

In [1]:
import scipy.io.wavfile as wav
import os

data_path = './Audio_Data'

sampling_freq, mic1_data = wav.read(os.path.join(data_path, 'mic1.wav'))
sampling_freq, mic2_data = wav.read(os.path.join(data_path, 'mic2.wav'))
sampling_freq, mic3_data = wav.read(os.path.join(data_path, 'mic3.wav'))
sampling_freq, mic4_data = wav.read(os.path.join(data_path, 'mic4.wav'))
sampling_freq, mic5_data = wav.read(os.path.join(data_path, 'mic5.wav'))

## 2. Form the mixture matrix X from the input files

In [2]:
import numpy as np

X = np.array([mic1_data, mic2_data, mic3_data, mic4_data, mic5_data])
X = np.transpose(X)/X.std(axis=1)


In [3]:
X.shape

(191258, 5)

In [4]:
import sounddevice as sd
sd.play(0.1*X[:, 0], sampling_freq)

In [5]:
import matplotlib.pyplot as plt
import numpy as np

t = np.arange(0, len(X[:,0])/sampling_freq, 1/sampling_freq)
plt.plot(t, X[:,0])
plt.show()

<Figure size 640x480 with 1 Axes>

## 3. Invoke the appropriate ICA command
that "unmixes" the five independent components from the mixture of audio signals. Experiment on the ff:
* appropriate input sampling rate
* whether centering is required
* whether whitening is necessary
* appropriate contrastive function G(y)

First, let's define a function that plays the unmixed components which are outputted by FastICA:

In [6]:
import sounddevice as sd
import time

def play_independent_components(S, sampling_freq):
    print('========================================================================================')
    print('INFO: Turn your volume up to listen to the separated components! (Earphones recommended)')
    print('========================================================================================')
    S = np.transpose(S)
    for index, component in enumerate(S):
        print('Playing component %d:' % (index+1))
        sd.play(component, sampling_freq)
        time.sleep(len(component)/sampling_freq + 1)
    print('Done!')
    


Before trying anything out, let's check if our data is already centered:

In [7]:
means = np.mean(X, axis=0)
print('means: ', means)

means:  [-4.78217954e-08 -1.79852265e-07 -7.77710164e-08  4.03623969e-09
 -3.21318801e-08]


The means of the mixed sound files seem to be very close to zero. Therefore we can consider the data centered by default.

####  Try default settings
* centering: yes
* whitening: yes
* G(y): logcosh (default)

In [8]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1])
S = ica.fit_transform(X)

In [9]:
play_independent_components(50*S, sampling_freq)

INFO: Turn your volume up to listen to the separated components! (Earphones recommended)
Playing component 1:
Playing component 2:
Playing component 3:
Playing component 4:
Playing component 5:
Done!


####  Try removing centering
* centering: no
* whitening: yes
* G(y): logcosh (default)

In [10]:
def whiten(X):
    X = np.transpose(X)
    Cx = np.cov(X)
    #print(Cx)
    V = 2*(1/np.sqrt(np.absolute(Cx)))
    #print(V.shape)
    #print(V)
    #print(X.shape)
    X_whitened = np.matmul(V, X)
    #print(np.transpose(X_whitened).shape)
    #print(X_whitened)
    return np.transpose(X_whitened)

In [11]:
X_not_centered = X - 10*np.random.uniform(low=-5, high=5, size=(1, X.shape[-1]))
print('means: ', np.mean(X_not_centered, axis=0))

X_not_centered = whiten(X_not_centered)
print(X_not_centered.shape)
ica = FastICA(n_components=X_not_centered.shape[-1], whiten=False)
# S = ica.fit_transform(X_not_centered)

means:  [-33.93942858  49.37727719   2.18800194 -23.48232819   1.54039754]
(191258, 5)


####  Try removing whitening
* centering: yes
* whitening: no
* G(y): logcosh (default)

In [12]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1], whiten=False)
S = ica.fit_transform(X)



In [13]:
play_independent_components(S, sampling_freq)

INFO: Turn your volume up to listen to the separated components! (Earphones recommended)
Playing component 1:
Playing component 2:
Playing component 3:
Playing component 4:
Playing component 5:
Done!


####  Try varying G(y): logcosh
* centering: yes
* whitening: yes
* G(y): logcosh (default)

In [44]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1])
S = ica.fit_transform(X)

In [45]:
from sklearn.metrics import mean_squared_error

def compute_residuals(X, X_recon):
    min_errors = np.zeros((X.shape[-1]))
    for i in range(X.shape[-1]):
        errors = np.array([5, 5, 5, 5, 5], dtype='float64')
        for j in range(X_recon.shape[-1]):
            errors[j] = mean_squared_error(X[:, i], X_recon[:, j])
        min_errors[i] = min(errors)
    return min_errors

In [46]:
A = ica.mixing_
X_recon = np.matmul(S, A)

residuals_logcosh = compute_residuals(X, X_recon)
print('Residuals for G(y)=logcosh: ', residuals_logcosh)

Residuals for G(y)=logcosh:  [0.99771003 0.35260233 0.61621657 1.58425705 0.93796394]


####  Try varying G(y): exp
* centering: yes
* whitening: yes
* G(y): exp

In [47]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1], whiten=True, fun='exp')
S = ica.fit_transform(X)

In [48]:
A = ica.mixing_
X_recon = np.matmul(S, A)

residuals_exp = compute_residuals(X, X_recon)
print('Residuals for G(y)=exp: ', residuals_exp)

Residuals for G(y)=exp:  [0.96268755 0.59576485 0.88834541 0.82041206 0.76904431]


####  Try varying G(y): cube
* centering: yes
* whitening: yes
* G(y): cube

In [49]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1], whiten=True, fun='cube')
S = ica.fit_transform(X)

In [50]:
A = ica.mixing_
X_recon = np.matmul(S, A)

residuals_cube = compute_residuals(X, X_recon)
print('Residuals for G(y)=cube: ', residuals_cube)

Residuals for G(y)=cube:  [0.4404183  0.70100829 0.23620003 0.84960115 0.30745145]


## 4. Save the independent components
as audio files in wav format. Label them as shat[1-5].wav

Best results when G(y) is cube:

In [51]:
from sklearn.decomposition import FastICA

ica = FastICA(n_components=X.shape[-1], whiten=True, fun='cube')
S = ica.fit_transform(X)

In [52]:
S.shape

(191258, 5)

In [55]:
import scipy.io.wavfile as wav

for i, unmixed in enumerate(np.transpose(S)):
    filename = 'shat%d.wav' % (i+1)
    wav.write(filename, sampling_freq, S[: , i])

## 5. Reconstruct the mixture signals
and measure the residuals for each one. Print out the residual values.

In [56]:
A = ica.mixing_
X_recon = np.matmul(S, A)

residuals_cube = compute_residuals(X, X_recon)
print('Residuals for G(y)=cube: ', residuals_cube)

Residuals for G(y)=cube:  [0.80859226 0.96807667 1.20942711 1.25663695 1.13143217]


## 6. Save the reconstructed mixture signals
as audio files in wav format. Label them as recon[1-5].wav

In [57]:
X_recon.shape

(191258, 5)

In [58]:
import scipy.io.wavfile as wav

for i, unmixed in enumerate(np.transpose(X_recon)):
    filename = 'recon%d.wav' % (i+1)
    wav.write(filename, sampling_freq, X_recon[: , i])