### Initilization

The goal of this notebook is creating a mix of random variable distributions. According to the ICA model: X = AS, where A is the mixing matrix, S are the true sources and X is the mixture. Here we'll save all these matrices for posterior analysis.

In [2]:
from __future__ import division
import numpy as np
import scipy.io
from scipy import signal
import sys

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
def is_invertible(a):
    return a.shape[0] == a.shape[1] and np.linalg.matrix_rank(a) == a.shape[0]

def get_mixture_mat(N=3):
    while True:
        A = np.random.random_sample((N, N)) + 1
        # Be sure that we have an invertible matrix
        if np.linalg.matrix_rank(A) == A.shape[0]:
            return A

### Creating distributions

The task envolve creating 9 distributions of diferent types, with a diferent number of sources and with varied statistics (100, 1000 and 100k points). We should have at least one case with:

- Temporal structure (such as sine, cosine). (only FOBI should work)
- Nonlinear mixture of sources. (only non-linear ICA should perform fairly)
- Noise. (so we can compare how ICA algorithms perform in this cenario)
- Many sources, at least 7 (Jade should not perform well in this case, according to the theory)
- Ill-conditioned mixture matrix. 
- Gaussian distribution (ICA won't like this)

#### #01 - Laplace + Uniform

Expectation: separation should occur without problems.

In [4]:
np.random.seed(1)

A = get_mixture_mat(N=2)
scipy.io.savemat('A_01.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s2 = np.random.laplace(0.5, 1, n_points)
    s3 = np.random.uniform(-1, 1, n_points)

    S = np.c_[s2, s3]
    scipy.io.savemat('S_01.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 2))
    scipy.io.savemat('X_01_%d.mat' % n_points, mdict={'X': X})

#### #02 - Laplace + Uniform (A ill-conditioned)

Expectation: don't really no.

In [24]:
np.random.seed(2)

A = get_mixture_mat(N=2)
A[0][0] = 1e-2
scipy.io.savemat('A_02.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s2 = np.random.laplace(0.5, 1, n_points)
    s3 = np.random.uniform(-1, 1, n_points)

    S = np.c_[s2, s3]
    scipy.io.savemat('S_02.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 2))
    scipy.io.savemat('X_02_%d.mat' % n_points, mdict={'X': X})

#### #03 - Gaussian + Laplace

Expectation: separation should occur without problems. Although we have a Gaussian distribution, by elimination we should be able to find it.

In [25]:
np.random.seed(3)

A = get_mixture_mat(N=2)
scipy.io.savemat('A_03.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s2 = np.random.normal(0, 1, n_points)
    s3 = np.random.laplace(0, 0.5, n_points)

    S = np.c_[s2, s3]
    scipy.io.savemat('S_03.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 2))
    scipy.io.savemat('X_03_%d.mat' % n_points, mdict={'X': X})

#### #04 - Laplace + Gaussian + Binomial

Expectation: same as above.

In [26]:
np.random.seed(4)

A = get_mixture_mat(N=3)
scipy.io.savemat('A_04.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s1 = np.random.binomial(5, .5, n_points)
    s2 = np.random.normal(0, 1, n_points)
    s3 = np.random.laplace(0, 0.5, n_points)

    S = np.c_[s1, s2, s3]
    scipy.io.savemat('S_04.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 3))
    scipy.io.savemat('X_04_%d.mat' % n_points, mdict={'X': X})

#### #05 - L1 + ... + L7 (7 Laplaces)

Expectation: separation should occur without problems, except for JADE.

In [27]:
np.random.seed(5)

A = get_mixture_mat(N=7)
scipy.io.savemat('A_05.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s1 = np.random.laplace(0, 0.5, n_points)
    s2 = np.random.laplace(0, 1, n_points)
    s3 = np.random.laplace(0, 3, n_points)
    s4 = np.random.laplace(-1, 1, n_points)
    s5 = np.random.laplace(-1, 2, n_points)
    s6 = np.random.laplace(1, 1, n_points)
    s7 = np.random.laplace(-1, 2, n_points)

    S = np.c_[s1, s2, s3, s4, s5, s6, s7]
    scipy.io.savemat('S_05.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 7))
    scipy.io.savemat('X_05_%d.mat' % n_points, mdict={'X': X})

#### #06 - Gaussian + Gaussian + Laplace

Expectation: separation won't be possible.

In [28]:
np.random.seed(6)

A = get_mixture_mat(N=3)
scipy.io.savemat('A_06.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    s1 = np.random.normal(0.5, 3, n_points)
    s2 = np.random.normal(0, 1, n_points)
    s3 = np.random.laplace(0, 0.5, n_points)

    S = np.c_[s1, s2, s3]
    scipy.io.savemat('S_06.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 3))
    scipy.io.savemat('X_06_%d.mat' % n_points, mdict={'X': X})

#### #07 - Laplace + (Uniform +noise)

Expectation: don't really no.

In [29]:
np.random.seed(7)

A = get_mixture_mat(N=2)
scipy.io.savemat('A_07.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:
    
    s1 = np.random.laplace(0, 0.5, n_points)
    s2 = np.random.uniform(0, 1, n_points) + 0.2*np.random.normal(size=n_points)
    
    S = np.c_[s1, s2]
    scipy.io.savemat('S_07.mat', mdict={'S': S})
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 2))
    scipy.io.savemat('X_07_%d.mat' % n_points, mdict={'X': X})

#### #08 - Temporal

Expectation: only FOBI should work.

In [30]:
np.random.seed(8)

A = get_mixture_mat(N=3)
scipy.io.savemat('A_08.mat', mdict={'A': A})

for n_points in [100, 1000, 100000]:

    time = np.linspace(0, 10, n_points)

    s1 = np.sin(2 * np.pi * time)  # Signal 1 : sinusoidal signal
    s2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
    s3 = signal.sawtooth(2 * np.pi * time)  # Signal 3: saw tooth signal
    
    S = np.c_[s1, s2, s3]
    scipy.io.savemat('S_08.mat', mdict={'S': S})

    # Mix data
    X = np.dot(S, A.T)
    assert (X.shape == (n_points, 3))
    scipy.io.savemat('X_08_%d.mat' % n_points, mdict={'X': X})

#### #09 - Non-linear mixture of Laplace + Uniform

In [31]:
np.random.seed(9)

G = np.array([
        [lambda x: x**2, lambda x: np.tanh(x)],
        [lambda x: x*np.tanh(x), lambda x: np.cosh(x)]
    ]).reshape(2, 2)

# X = G(S)

for n_points in [100, 1000, 100000]:

    s2 = np.random.laplace(0.5, 1, n_points)
    s3 = np.random.uniform(-1, 1, n_points)

    S = np.c_[s2, s3]
    scipy.io.savemat('S_09.mat', mdict={'S': S})
    
    # S += 0.2 * np.random.normal(size=S.shape)  # Add noise
    # S /= S.std(axis=0)  # Standardize data
    # Mix data
    # X = np.dot(S, A.T)
    X = np.zeros((n_points, 2))
    X[0] = G[0][0](S[0]) + G[1][0](S[1])
    X[1] = G[0][1](S[0]) + G[1][1](S[1])
    scipy.io.savemat('X_09_%d.mat' % n_points, mdict={'X': X})