# TMA4320 - Mal for kode til Prosjekt 1
Dette er et forslag til hvordan du kan strukturere python-kode for å gjøre uavhengig komponentanalyse på lydsignaler.

## Forberedelser
Først må vi importere data. Vi antar i koden nedenfor at lydfilene med miksede signaler ligger på audio/mix_1.wav,
audio/mix_2.wav, audio/mix_3.wav. Du skal i arkivet Prosjekt1.zip kunne finne disse filene samt wav_file_loader.py som blir importert i koden nedenfor.


In [156]:
"""I denne cella laster vi opp tre miksede lydklipp som vi skal bruke å teste ut algoritmen på"""
import numpy as np
from wav_file_loader import read_wavefiles

paths = ['audio/mix_1.wav', 'audio/mix_2.wav', 'audio/mix_3.wav']
data, sampling_rate = read_wavefiles(paths)
num_signals = data.shape[0]

(3, 50000)


In [159]:
"""I denne cellen fins en funksjon som normaliserer lydsignalenes volum, slik at de får omtrent samme lydstyrke"""
def normalize_audio(data):
    """Scale amplitude s.t. max(data[i]) == 1."""
    abs_data = np.absolute(data)
    maximums = np.amax(abs_data,1)
    # Divide each row by a different vector element:
    data = data / maximums.reshape((3,1))
    return data

data = normalize_audio(data)
print(data.shape)
print(data)

(3, 50000)
[[ 0.02745098  0.35686275  0.28627451 ...  0.23137255  0.27843137
   0.36470588]
 [-0.06666667  0.29411765  0.2627451  ...  0.23921569  0.28627451
   0.36470588]
 [-0.50588235  0.15294118  0.18431373 ...  0.28627451  0.34901961
   0.45098039]]


In [119]:
"""Her kan du spille av de tre opplastede lydklippene"""
import IPython.display as ipd

ipd.display(ipd.Audio(data[0,:], rate=sampling_rate))
ipd.display(ipd.Audio(data[1,:], rate=sampling_rate))
ipd.display(ipd.Audio(data[2,:], rate=sampling_rate))

## Miksing
Denne delen om miksing, inkludert kodecellen nedenfor kan du foreløpig ignorere. Men dersom du seinere vil teste ut algoritmen ved å selv blande opplastede uavhengige signaler så kan funksjonene nedenfor komme til nytte.

In [120]:
def normalize_rowsums(A):
    """Divide each row in A by its sum.
    
    The sum of each row in the result is 1.0."""
    the_sum = np.sum(A,1)
    A = A / the_sum.reshape((3,1))
    return A

def random_mixing_matrix(signals, observations):
    """ Creates a random matrix
    
    Each element is a small positive number, not too close to 0.
    (1/11, 5/7).
    """
    A = 0.25 + np.random.rand(observations, signals)
    return normalize_rowsums(A)


In [121]:
A = random_mixing_matrix(num_signals, num_signals)
data_mixed = normalize_audio(A @ data)

In [122]:
import IPython.display as ipd

ipd.display(ipd.Audio(data_mixed[0,:], rate=sampling_rate))
ipd.display(ipd.Audio(data_mixed[1,:], rate=sampling_rate))
ipd.display(ipd.Audio(data_mixed[2,:], rate=sampling_rate))

## Preprosessering
I den etterfølgende cellen skal du skrive funksjonene <font color='blue'> center_rows </font> og <font color='blue'> whiten_rows </font> for å gjøre de preprosesseringsstegene som er omtalt i prosjektbeskrivelsen. I hvert tilfelle er variabelen Z et array av dimensjon $d\times N$ av miksede signaler.

In [165]:
def center_rows(Z):
    """Ensures each row has zero mean.
    
    Takes a matrix of arbitrary shape and subtracts from each row the mean value of that row."""
    
    # Here goes your code. The code should return a dxN-matrix, say Zc, where each row has zero mean
    row_means = np.mean(Z, axis=1)
    Z_transposed = Z.transpose()
    Zc_transposed = Z_transposed - row_means
    Zc = Zc_transposed.transpose()
    return Zc #, mus

#This function require square matrix, but should not
def whiten_rows(Z):
    """Return whitened version of Z and the matrix for the transform, say Zw, T, where Zw=T*Z
    """
    # Your code goes here.
    # Hints: The covariance matrix can be obtained by the function cov in numpy, call it C.
    C = np.cov(Z)
    print(C.shape)
    # The following two statements compute T (inverse square root of C).
    U, S, _ = np.linalg.svd(C, full_matrices=False)
    T  = U @ np.diag(1 / np.sqrt(S)) @ U.T
    
    
    
    Zw = T*Z
    return Zw, T

In [166]:
#Test above functions

#Test of center_rows:
Z = np.array([[5,2,7,4],[5,2,2,1],[7,8,0,6]])
Zc = center_rows(Z)
#print(np.mean(Zc, axis=1))

#Test of whiten_rows:
Zwc, T = whiten_rows(Zc)
print(Zwc)
print('')
print(T)

(3, 3)


ValueError: operands could not be broadcast together with shapes (3,4) (3,3) 

## Hovediterasjonen - maksimering av ikke-gaussiskhet

In [125]:
def normalize_rownorms(Z):
    """Divide each row in A by its Euclidean norm.
    
    The norm of each row in the output is equal to one.
    
    Your code goes under here. You need to compute the Euclidean norm of each row of the matrix Z
    and then scale each row by this norm. 
    """
    e_norms = np.linalg.norm(Z, axis=1)
    Z_trans = Z.transpose()
    Z_norm_trans = Z_trans / e_norms
    Z_norm = Z_norm_trans.transpose()
    return Z_norm

In [126]:
#Test og normalize_rownorms:
Z_norm = normalize_rownorms(Z)
print(Z_norm)
print(np.linalg.norm(Z_norm, axis=1))

[[0.62217102 0.41478068 0.51847585 0.41478068]
 [0.51571062 0.20628425 0.72199487 0.4125685 ]
 [0.85749293 0.34299717 0.34299717 0.17149859]
 [0.57346234 0.65538554 0.         0.49153915]]
[1. 1. 1. 1.]


In [127]:
def decorrelate_weights(W):
    """ This is the orthogonalization step (or decorrelation step) The dxd input matrix W is projected onto an 
    orthogonal matrix by the transformation Wd = (WW^T)^{-1/2} W as described in the note. The single output 
    argument is the projected W-matrix (Wd)
    Hint: Use a similar technque for the inverse square root as in the whitening step
    
    Your code goes here      """
    WW_T = W*W.transpose()
    U, S, _ = np.linalg.svd(WW_T, full_matrices=False)
    WW_T_inv_sqrt = U @ np.diag(1 / np.sqrt(S)) @ U.T
    Wd = WW_T_inv_sqrt * W

    return Wd



In [128]:
#Test of decorrelate_weights:

Wd = decorrelate_weights(Z)
C = np.cov(Wd)
print(Wd)
print('')
print(C)

[[ 1.39554988 -0.37560964 -0.38426563 -0.12115337]
 [-0.46951205  0.52190894  0.38532367 -0.14052944]
 [-0.38426563  0.11009248  0.52878838 -0.02683293]
 [-0.2120184  -0.28105889 -0.          1.04596275]]

[[ 0.72826575 -0.34017677 -0.26895255 -0.12938452]
 [-0.34017677  0.21299091  0.14282579 -0.07840289]
 [-0.26895255  0.14282579  0.14238213 -0.00300724]
 [-0.12938452 -0.07840289 -0.00300724  0.38052113]]


In [129]:
def update_W(W, Zcw):
    """Calculates W_k+1 from W_k.
    So the input is W=W_k (d x d) as well as the centered, whitened data Zcw (dxN known as tilde{x} in the note)
    Output is the new W (W_{k+1}).
    
    This function does the two iteration steps in the note: The optimisation step and the 
    orthogonalisation (decorrelation) step. The first step you need to code, the orthogonalisation is already
    provided by the function decorrelate_weights that needs to be called.
    You can use the kurtosis version, i.e. G(u)=4*(u**3) and its derivative. Don't include the while-loop in 
    this function
    """
    Wd = decorrelate_weights(W)
    s_k = (Wd * Zcw)
    
    #kurtosis and derivative as lambda funtions
    kurtosis = lambda u: 4*(u**3)
    kurtosis_d = lambda u: 12*(u**2)
    
    #vecorizing functions
    vec_kurtosis = np.vectorize(kurtosis)
    vec_kurtosis_d = np.vectorize(kurtosis_d)
    
    G = vec_kurtosis(s_k)
    G_d = vec_kurtosis_d(G)
    
    M,N = G_d.shape
    
    W_p = (1/N)*G*Zcw.transpose() - np.diag(np.linalg.norm(G_d, axis=1))*Wd
    W_pn = normalize_rownorms(W_p)
    W_pnd = decorrelate_weights(W_pn)
    
    '''
    Bunch of debug prints
    print(W_pnd)
    print(M)
    print(N)
    print(s_k)
    print('')
    print(G)
    print('')
    print(G_d) #debug print
    '''
    return W_pnd
    

In [130]:
G_d = update_W(Z, Zwc)
print(Z.shape)
print(Zwc.shape)

print('')
print(G_d)


[[-1.00000000e+00  5.05000433e-80 -1.62538779e-81  9.16120771e-87]
 [ 2.98002313e-72 -1.00000000e+00  1.18131229e-32 -2.09032557e-41]
 [ 3.70297869e-75  1.66618524e-39 -1.00000000e+00 -1.88859669e-43]
 [ 1.25138132e-71 -3.07890280e-33  0.00000000e+00 -1.00000000e+00]]


In [131]:
def measure_of_convergence(W1, W2):
    """This function computes an error estimate for the maximisation iteration, it computes the convergence
    criterion given in the note. 
    Input: W1 is the previous iterate, and W2 is the one just computed.
    Output: The quantity delta defined in the note.
    Typical numpy-functions to use: numpy.sum, numpy.absolute, numpy.amax.
    
    Your code goes here:
    """
    a_s = np.absolute(np.sum(W2*W1, axis=None))
    delta = np.amax((1-a_s), axis=None)
    return delta

In [132]:
W1 = np.array([[4,5,9],[8,2,5],[2,2,1]])
W2 = np.array([[6,3,7], [9,3,6], [5,3,1]])
print(measure_of_convergence(W1, W2))

-226


In [150]:
import warnings


def fast_ICA(Z, signals_to_find, tol=1e-10, max_iter=100):
    """ This is the function that organises all the work.
    
    Input: Z is the unprocessed data
           signals_to_find: in our case, always d the number of sources
           tol is the tolerance, default value 1.0e-10
           max_iter abort after max_iter iterations if not converged, (to avoid infinite loop)
    Output: Z_ica, the separated signals (dxN matrix, approximating the sources)
            W The final converged W-matrix (dxd)
            Also some other variables of interest can be returned if desired
    """
    # center the rows of Z
    Z_cent = center_rows(Z)

    # whiten the centered rows
    Z_cent_wit, T = whiten_rows(Z_cent)
    
    # Put W_0 = W to a random initial value and normalise the rows to length 1
    M, N = Z_cent.shape
    W_0 = np.random.rand(M, N)
    
    # Initialise some variables to prepare for the while-loop (such as delta)
    delta = tol + 1
    number_of_iter = 0
    
    # while delta>tol and number_of_iterations < max_iter:
    #      do an iteration to get a new W-iterate 
    #      Compute the error estimate to update delta
    while delta > tol and number_of_iter < max_iter:
        W_p = update_W(W_0, Z_cent_wit)
        W_p = Z
        delta = np.abs(measure_of_convergence(W_0, W_p))
        W_0 = W_p
        number_of_iter += 1
    
    # Clean up, check if converged or max_iter attained
    if number_of_iter == max_iter:
        print(':(((')
        
    else:
        print(delta)
        
    return W_p
    


In [160]:
fast_ICA(data, 3)

ValueError: operands could not be broadcast together with shapes (3,3) (3,50000) 