Skip to content

Commit

Permalink
feat(preprocessors): implement PCA algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieugouel committed Aug 7, 2019
1 parent af64e44 commit 911ed7c
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -34,6 +34,7 @@ Alchina is a Machine Learning framework.

- Min-max normalization
- Standardization
- PCA

**Metrics**

Expand Down
55 changes: 55 additions & 0 deletions alchina/preprocessors.py
Expand Up @@ -4,6 +4,8 @@

from typing import Optional

from .utils import features_reshape


class Normalization(object):
"""Rescale the data via a normalization.
Expand Down Expand Up @@ -39,3 +41,56 @@ def __call__(self, X, axis: int = 0):
self.sigma = np.ones_like(self.sigma)

return np.divide(X - self.mu, self.sigma)


class PCA(object):
"""Principal Component Analysis."""

def __init__(self, n_components: Optional[int] = None):
self.n_components = n_components

self._covariance_matrix = None
self._U_reduced = None
self._mean = None

def fit(self, X):
"""Train the model."""
X = features_reshape(X)
max_dimension = min(X.shape)

if self.n_components is None:
self.n_components = max_dimension
elif self.n_components > max_dimension:
raise ValueError(f"n_components must be lesser than {max_dimension}")

self._covariance_matrix = np.cov(X.T)
self._mean = np.mean(X, axis=0)

U, _, _ = np.linalg.svd(self._covariance_matrix)
self._U_reduced = U[:, : self.n_components]

def transform(self, X):
"""Transform the input."""
if None in (self._U_reduced, self._mean):
self.fit(X)

return (X - self._mean).dot(self._U_reduced)

def score_samples(self, X):
"""Compute the log-likelihood of all samples."""
X = features_reshape(X)

n_features = X.shape[1]
precision = np.linalg.inv(self._covariance_matrix)
residuals = X - self._mean

return -(1 / 2) * (
-np.log(np.linalg.det(precision))
+ np.sum((residuals * np.dot(residuals, precision)), axis=1)
+ n_features * np.log(2 * np.pi)
)

def score(self, X):
"""Compute the mean of log-likelihood of all samples."""
X = features_reshape(X)
return np.mean(self.score_samples(X))
33 changes: 32 additions & 1 deletion tests/test_preprocessors.py
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from alchina.preprocessors import Normalization, Standardization
from alchina.preprocessors import Normalization, Standardization, PCA


# --- Normalization ---
Expand Down Expand Up @@ -31,3 +31,34 @@ def test_standardization():

assert np.mean(X_stand) == pytest.approx(0)
assert np.std(X_stand) == pytest.approx(1)


# --- PCA ---


def test_pca_with():
"""Test of `PCA` class."""
X = np.random.normal(0, 1, (150, 2))

pca = PCA()
pca.transform(X)
assert pca.score(X) < 0


def test_pca_with_specified_n_components():
"""Test of `PCA` class."""
X = np.random.normal(0, 1, (150, 2))

pca = PCA(n_components=1)
pca.transform(X)
assert pca.score(X) < 0


def test_pca_with_invalid_n_components():
"""Test of `PCA` class."""
X = np.random.normal(0, 1, (150, 2))

pca = PCA(n_components=5)

with pytest.raises(ValueError):
pca.transform(X)

0 comments on commit 911ed7c

Please sign in to comment.