# Independent Component Analysis (ICA) - From Scratch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('../../data.csv')
df = df.replace("?", np.nan)
df = df.apply(pd.to_numeric, errors='coerce')
df = df.drop(columns=['slope', 'ca', 'thal'])
df = df.fillna(df.median())
X = df.drop(columns=['num       ']).values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
class ICAScratch:
    def __init__(self, n_components, max_iter=200, tol=1e-4):
        self.n_components = n_components
        self.max_iter = max_iter
        self.tol = tol
        self.W = None

    def g(self, x):
        return np.tanh(x)

    def g_prime(self, x):
        return 1 - np.tanh(x)**2

    def whiten(self, X):
        X = X - np.mean(X, axis=0)
        cov = np.cov(X.T)
        d, E = np.linalg.eigh(cov)
        D_inv = np.diag(1. / np.sqrt(d + 1e-10))
        X_white = np.dot(X, np.dot(E, np.dot(D_inv, E.T)))
        return X_white

    def fit(self, X):
        X = self.whiten(X)
        n_samples, n_features = X.shape
        W = np.random.rand(self.n_components, n_features)

        for i in range(self.n_components):
            w = W[i, :]
            w /= np.linalg.norm(w)

            for j in range(self.max_iter):
                w_new = (X * self.g(np.dot(X, w))[:, np.newaxis]).mean(axis=0) - self.g_prime(np.dot(X, w)).mean() * w
                if i > 0:
                    w_new -= np.dot(np.dot(w_new, W[:i].T), W[:i])
                w_new /= np.linalg.norm(w_new)
                
                if np.abs(np.abs(np.dot(w, w_new)) - 1) < self.tol:
                    break
                w = w_new
            
            W[i, :] = w
        self.W = W

    def transform(self, X):
        X = X - np.mean(X, axis=0)
        return np.dot(X, self.W.T)

In [None]:
ica = ICAScratch(n_components=2)
ica.fit(X_scaled)
X_transformed = ica.transform(X_scaled)

In [None]:
sns.set_style("white")
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
scatter = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=df['num       '], cmap='coolwarm', s=60, alpha=0.7, edgecolor='white')
plt.title('ICA Components: Feature Independence', fontsize=14)
plt.xlabel('Independent Component 1')
plt.ylabel('Independent Component 2')
plt.colorbar(scatter, label='Target (Heart Disease)')

plt.subplot(1, 2, 2)
sns.kdeplot(X_transformed[:, 0], fill=True, color='teal', label='Component 1', alpha=0.3)
sns.kdeplot(X_transformed[:, 1], fill=True, color='salmon', label='Component 2', alpha=0.3)
plt.title('Non-Gaussianity of Components', fontsize=14)
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()

plt.tight_layout()
plt.show()