In [10]:
import os
import numpy as np
from scipy.io import wavfile
from sklearn.decomposition import FastICA
from scipy.signal import resample
import json

In [11]:
# File names
in_files = [f"mic{i}.wav" for i in range(1, 6)]

# Load all WAVs
rates = []
signals = []
lengths = []

# prepare storage list
for p in in_files:
    sr, data = wavfile.read(p) #ICA prefers same signal length and sample rate (sr) which is audio samples recorded per second.
    rates.append(sr)

    # If stereo, convert to mono ICA requires 1 signal only
    if data.ndim > 1:
        data = data.mean(axis=1) # combines L and R audios to be mono

    signals.append(data.astype(np.float32)) # 
    lengths.append(len(data)) # ICA requires all signals to have the same number of samples

print("Sampling rates:", rates)
print("Lengths:", lengths)


Sampling rates: [22050, 22050, 22050, 22050, 22050]
Lengths: [191258, 191258, 191258, 191258, 191258]


In [12]:
# Trim all to shortest file
min_len = min(lengths)
signals = [s[:min_len] for s in signals]

# X shape = (n_samples × n_mics)
X = np.vstack(signals).T
print("Mixture matrix shape:", X.shape)

# Normalize to [-1,1] Stabilizes ICA, prevents overflow
X = X / np.max(np.abs(X))

# 191258 samples, 5 mixed microphone signals

Mixture matrix shape: (191258, 5)


In [13]:
center_options = [True, False] #true: subtract the mean of each signal (centering)
whiten_options = [True, False] #true: whitening (pre-decorrelate data) , Whitening helps ICA separate signals more easily.
fun_options = ['logcosh', 'exp', 'cube'] # FastICA tries to maximize non-Gaussianity. The “fun” parameter determines how it measures non-Gaussianity. Test 3 ICA nonlinearity functions

orig_sr = rates[0] # The original sampling rate of your WAV files
target_srs = [orig_sr, 8000, 16000]  # choose resample targets <= original rate # Sampling rates you will resample to before ICA

print("Target sampling rates:", target_srs)


Target sampling rates: [22050, 8000, 16000]


In [14]:
results = []

def evaluate_config(target_sr, center, whiten, fun): # Define function for each configuration
    cfg = dict(target_sr=target_sr, center=center, whiten=whiten, fun=fun) # Useful for printing / saving.

    try:
        # Resample if needed: Resampling shrinks the signal to reduce computation.
        if target_sr != orig_sr:
            new_n = int(X.shape[0] * target_sr / orig_sr)
            X_res = resample(X, new_n, axis=0)
        else:
            X_res = X.copy()

        # Centering
        X_proc = X_res.copy()
        mean_vec = np.zeros(X_proc.shape[1])
        if center:
            mean_vec = X_proc.mean(axis=0)
            X_proc = X_proc - mean_vec # ensures signals have zero mean (important for ICA math)

        # ICA (sklearn ≥ 1.4 uses whiten="unit-variance"/"off")
        ica = FastICA(
            n_components=X_proc.shape[1],
            whiten=whiten,        # "unit-variance" or "off" (because sklearn ≥ 1.4 changed whitening behavior)
            fun=fun,
            max_iter=2000,
            random_state=0 # ensures reproducibility
        )

        S = ica.fit_transform(X_proc) # the unmixed signals (speech, music, noise)
        A = ica.mixing_ # mixing matrix describing how sources combine

        # reconstruction
        X_hat = S @ A.T #where x= observed mixed signals (microphones)
        if center:
            X_hat += mean_vec

        # match original length for RMSE (It measures how close your reconstructed X_hat is to the original X. low means ICA reconstruction is good)
        if target_sr != orig_sr:
            X_hat_orig = resample(X_hat, X.shape[0], axis=0)
        else:
            X_hat_orig = X_hat

        residuals = np.sqrt(np.mean((X_hat_orig - X)**2, axis=0)) # residuals: RMSE for each microphone
        total = residuals.mean() # total = average RMSE across all mics

        results.append((cfg, residuals, total, S, A, mean_vec, X_hat))
        print(f"{cfg} → mean RMSE = {total:.3e}")

    except Exception as e:
        print("FAILED:", cfg, " → ", e)


# updated whiten options for sklearn ≥ 1.4
whiten_options = ["unit-variance", "arbitrary-variance", False]

# run grid
for sr in target_srs:
    for c in center_options:
        for w in whiten_options:
            for f in fun_options:
                evaluate_config(sr, c, w, f)


{'target_sr': 22050, 'center': True, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 3.704e-08
{'target_sr': 22050, 'center': True, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 4.507e-08
{'target_sr': 22050, 'center': True, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 4.759e-08
{'target_sr': 22050, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 5.144e-08
{'target_sr': 22050, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 6.885e-08
{'target_sr': 22050, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 5.222e-08




{'target_sr': 22050, 'center': True, 'whiten': False, 'fun': 'logcosh'} → mean RMSE = 3.249e-08
{'target_sr': 22050, 'center': True, 'whiten': False, 'fun': 'exp'} → mean RMSE = 4.342e-08
{'target_sr': 22050, 'center': True, 'whiten': False, 'fun': 'cube'} → mean RMSE = 3.273e-08
{'target_sr': 22050, 'center': False, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 5.204e-08
{'target_sr': 22050, 'center': False, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 5.966e-08
{'target_sr': 22050, 'center': False, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 4.785e-08
{'target_sr': 22050, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 6.669e-08
{'target_sr': 22050, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 5.510e-08
{'target_sr': 22050, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 4.397e-08
{'target_sr': 22050, 'center': False, 'whiten': False, 'fun': 'logcosh'} → m



{'target_sr': 22050, 'center': False, 'whiten': False, 'fun': 'cube'} → mean RMSE = 4.038e-08
{'target_sr': 8000, 'center': True, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': True, 'whiten': False, 'fun': 'logcosh'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': True, 'whiten': False, 'fun': 'exp'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': True, 'whiten': False, 'fun': 'cube'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': False, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': False, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': False, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 1.129e-02
{'target_sr': 8000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': False, 'whiten': False, 'fun': 'logcosh'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': False, 'whiten': False, 'fun': 'exp'} → mean RMSE = 1.129e-02




{'target_sr': 8000, 'center': False, 'whiten': False, 'fun': 'cube'} → mean RMSE = 1.129e-02
{'target_sr': 16000, 'center': True, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': True, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': True, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': True, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 3.873e-03




{'target_sr': 16000, 'center': True, 'whiten': False, 'fun': 'logcosh'} → mean RMSE = 3.873e-03




{'target_sr': 16000, 'center': True, 'whiten': False, 'fun': 'exp'} → mean RMSE = 3.873e-03




{'target_sr': 16000, 'center': True, 'whiten': False, 'fun': 'cube'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'unit-variance', 'fun': 'logcosh'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'unit-variance', 'fun': 'exp'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'unit-variance', 'fun': 'cube'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'logcosh'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'exp'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': 'arbitrary-variance', 'fun': 'cube'} → mean RMSE = 3.873e-03




{'target_sr': 16000, 'center': False, 'whiten': False, 'fun': 'logcosh'} → mean RMSE = 3.873e-03




{'target_sr': 16000, 'center': False, 'whiten': False, 'fun': 'exp'} → mean RMSE = 3.873e-03
{'target_sr': 16000, 'center': False, 'whiten': False, 'fun': 'cube'} → mean RMSE = 3.873e-03




In [15]:
best = sorted(results, key=lambda x: x[2])[0]
best_cfg, best_residuals, best_total, best_S, best_A, best_mean, best_Xhat = best

print("Best configuration:", best_cfg)
print("Residuals:", best_residuals)
print("Mean RMSE:", best_total)


Best configuration: {'target_sr': 22050, 'center': False, 'whiten': False, 'fun': 'logcosh'}
Residuals: [2.4764580e-08 3.1796596e-08 1.9307711e-08 2.5270278e-08 3.5815692e-08]
Mean RMSE: 2.7390971e-08


In [16]:
def save_wav(path, data, sr):
    data = np.clip(data, -1, 1)
    wavfile.write(path, sr, (data * 32767).astype(np.int16))

if best_cfg["target_sr"] != orig_sr:
    S_used = resample(best_S, X.shape[0], axis=0)
    recon = resample(best_Xhat, X.shape[0], axis=0)
    out_sr = orig_sr
else:
    S_used = best_S
    recon = best_Xhat
    out_sr = orig_sr

# Save separated sources safely
for i in range(S_used.shape[1]):
    src = S_used[:, i]
    max_val = np.max(np.abs(src))
    
    if max_val < 1e-10:
        print(f"Warning: Component {i+1} has near-zero energy.")
        s = src
    else:
        s = src / max_val

    save_wav(f"shat{i+1}.wav", s, out_sr) # save_wav(filename, data, sr)

print("Saved shat1.wav ... shat5.wav")


Saved shat1.wav ... shat5.wav


In [17]:
for i in range(recon.shape[1]):
    r = recon[:, i] / np.max(np.abs(recon[:, i]))
    save_wav(f"recon{i+1}.wav", r, out_sr)

print("Saved recon1.wav ... recon5.wav")


Saved recon1.wav ... recon5.wav


In [18]:
summary = {
    "input_sampling_rates": rates,
    "best_config": best_cfg,
    "best_per_channel_rmse": best_residuals.tolist(),
    "best_mean_rmse": float(best_total)
}

with open("ica_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

summary


{'input_sampling_rates': [22050, 22050, 22050, 22050, 22050],
 'best_config': {'target_sr': 22050,
  'center': False,
  'whiten': False,
  'fun': 'logcosh'},
 'best_per_channel_rmse': [2.476457972022672e-08,
  3.179659557872583e-08,
  1.9307710985572157e-08,
  2.5270278314337702e-08,
  3.5815691745710865e-08],
 'best_mean_rmse': 2.7390971268914655e-08}

In [None]:
# Report describing your efforts in the determination of the following:
# (a) What is the sampling rate for the input files ? 22050Hz
# (b) Is centering necessary ? yes to improve ICA stability, better whitening (needed for covariance) and produce lower RMSE
# (c) Is whitening required ? yes. remove correlations and equalize loudness
# (d) What is the appropriate contrastive function G(y) ? logcosh as it gives the least RMSE and with most stable unmixing
# (e) What are the residuals of the reconstructed mixture signals ?
#  [2.476457972022672e-08,   3.179659557872583e-08,   1.9307710985572157e-08,   2.5270278314337702e-08,   3.5815691745710865e-08]