Skip to content

Commit

Permalink
feat(project): structure refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieu committed Aug 8, 2019
1 parent b3791aa commit b2bc8f1
Show file tree
Hide file tree
Showing 20 changed files with 140 additions and 105 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Alchina is a Machine Learning framework.

**Clusters**

- K-Means clustering
- K-Means

**Optimizers**

Expand Down
6 changes: 6 additions & 0 deletions alchina/classifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Classifiers."""

from .linear import LinearClassifier, RidgeClassifier
from .knn import KNNClassifier

__all__ = ["LinearClassifier", "RidgeClassifier", "KNNClassifier"]
20 changes: 10 additions & 10 deletions alchina/classifiers.py → alchina/classifiers/linear.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
"""Classifiers."""
"""Linear Classifiers."""

import numpy as np

from abc import ABC, abstractmethod

from .exceptions import InvalidInput, NotFitted
from .metrics import accuracy_score
from .optimizers import GradientDescent
from .preprocessors import Standardization
from .utils import check_dataset_consistency, features_reshape
from alchina.exceptions import InvalidInput, NotFitted
from alchina.metrics import accuracy_score
from alchina.optimizers import GradientDescent
from alchina.preprocessors import Standardization
from alchina.utils import check_dataset_consistency, features_reshape


class AbstractClassifier(ABC):
"""Abstract class for classifiers algorithms."""
class AbstractLinearClassifier(ABC):
"""Abstract class for linear classifiers algorithms."""

def __init__(self, *args, optimizer=None, standardize: bool = True, **kwargs):
self.standardize = Standardization() if standardize else None
Expand Down Expand Up @@ -88,7 +88,7 @@ def score(self, X, y):
return accuracy_score(self.predict(X), y)


class LinearClassifier(AbstractClassifier):
class LinearClassifier(AbstractLinearClassifier):
"""Linear classifier (logistic regressor)."""

def sigmoid(self, z):
Expand All @@ -111,7 +111,7 @@ def gradient(self, X, y, theta):
return X.T.dot(self.hypothesis(X, theta) - y)


class RidgeClassifier(LinearClassifier):
class RidgeClassifier(AbstractLinearClassifier):
"""Regularized linear classifier."""

def __init__(self, *args, regularization: float = 1, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions alchina/clusters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Clusters."""

from .kmeans import KMeans

__all__ = ["KMeans"]
4 changes: 2 additions & 2 deletions alchina/clusters.py → alchina/clusters/kmeans.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Clusters."""
"""K-Means."""

import numpy as np


class KMeans(object):
"""K-means algorithm."""
"""K-Means algorithm."""

def __init__(self, n_centroids, max_iterations=300):
self.n_centroids = n_centroids
Expand Down
5 changes: 5 additions & 0 deletions alchina/optimizers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Optimizers."""

from .gradient_descent import GradientDescent, SGD, MBGD

__all__ = ["GradientDescent", "SGD", "MBGD"]
14 changes: 7 additions & 7 deletions alchina/optimizers.py → alchina/optimizers/gradient_descent.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"""Optimizers algorithms."""
"""Gradient Descent Optimizers algorithms."""

import numpy as np

from abc import ABC, abstractmethod
from typing import Optional

from .exceptions import InvalidInput, NotBuilt
from .utils import check_dataset_consistency, shuffle_dataset
from alchina.exceptions import InvalidInput, NotBuilt
from alchina.utils import check_dataset_consistency, shuffle_dataset


class AbstractOptimizer(ABC):
class AbstractGDOptimizer(ABC):
"""Abstract class for optimizers algorithms."""

def __init__(self, iterations: int = 100, history: bool = False):
Expand All @@ -32,7 +32,7 @@ def __call__(self):
pass # pragma: no cover


class GradientDescent(AbstractOptimizer):
class GradientDescent(AbstractGDOptimizer):
"""Batch gradient descent."""

def __init__(self, *args, learning_rate: float = 0.01, **kwargs):
Expand Down Expand Up @@ -60,7 +60,7 @@ def __call__(self, X, y, *args, **kwargs):
return self.parameters


class SGD(AbstractOptimizer):
class SGD(AbstractGDOptimizer):
"""Stochastic gradient descent."""

def __init__(
Expand Down Expand Up @@ -92,7 +92,7 @@ def __call__(self, X, y, *args, **kwargs):
return self.parameters


class MBGD(AbstractOptimizer):
class MBGD(AbstractGDOptimizer):
"""Mini-batch gradient descent."""

def __init__(
Expand Down
6 changes: 6 additions & 0 deletions alchina/preprocessors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Preprocessors."""

from .data import Normalization, Standardization
from .pca import PCA

__all__ = ["Normalization", "Standardization", "PCA"]
41 changes: 41 additions & 0 deletions alchina/preprocessors/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Data Preprocessors."""

import numpy as np

from typing import Optional


class Normalization(object):
"""Rescale the data via a normalization.
Produce:
- Bring all values into the range [0, 1]
"""

def __call__(self, X, axis: int = 0):
min_x = np.amin(X, axis=axis)
max_x = np.amax(X, axis=axis)
return (X - min_x) / (max_x - min_x)


class Standardization(object):
"""Rescale the data via a standardization
Produce:
- mean(Xstandardized) = 0
- std(Xstandardized) = 1
"""

def __init__(self, mu: Optional[int] = None, sigma: Optional[int] = None):
self.mu = mu
self.sigma = sigma

def __call__(self, X, axis: int = 0):
if self.mu is None or self.sigma is None:
self.mu = np.mean(X, axis=axis)
self.sigma = np.std(X, axis=axis)

if not np.any(self.sigma):
self.sigma = np.ones_like(self.sigma)

return np.divide(X - self.mu, self.sigma)
44 changes: 4 additions & 40 deletions alchina/preprocessors.py → alchina/preprocessors/pca.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,15 @@
"""Preprocessors."""
"""Principal Component Analysis."""

import numpy as np

from typing import Optional

from .exceptions import InvalidInput, NotFitted
from .utils import features_reshape


class Normalization(object):
"""Rescale the data via a normalization.
Produce:
- Bring all values into the range [0, 1]
"""

def __call__(self, X, axis: int = 0):
min_x = np.amin(X, axis=axis)
max_x = np.amax(X, axis=axis)
return (X - min_x) / (max_x - min_x)


class Standardization(object):
"""Rescale the data via a standardization
Produce:
- mean(Xstandardized) = 0
- std(Xstandardized) = 1
"""

def __init__(self, mu: Optional[int] = None, sigma: Optional[int] = None):
self.mu = mu
self.sigma = sigma

def __call__(self, X, axis: int = 0):
if self.mu is None or self.sigma is None:
self.mu = np.mean(X, axis=axis)
self.sigma = np.std(X, axis=axis)

if not np.any(self.sigma):
self.sigma = np.ones_like(self.sigma)

return np.divide(X - self.mu, self.sigma)
from alchina.exceptions import InvalidInput, NotFitted
from alchina.utils import features_reshape


class PCA(object):
"""Principal Component Analysis."""
"""Principal Component Analysis algorithm."""

def __init__(self, n_components: Optional[int] = None):
self.n_components = n_components
Expand Down
5 changes: 5 additions & 0 deletions alchina/regressors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Regressors."""

from .linear import LinearRegressor, RidgeRegressor

__all__ = ["LinearRegressor", "RidgeRegressor"]
16 changes: 8 additions & 8 deletions alchina/regressors.py → alchina/regressors/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

from abc import ABC, abstractmethod

from .exceptions import InvalidInput, NotFitted
from .metrics import r2_score
from .optimizers import GradientDescent
from .preprocessors import Standardization
from .utils import check_dataset_consistency, features_reshape
from alchina.exceptions import InvalidInput, NotFitted
from alchina.metrics import r2_score
from alchina.optimizers import GradientDescent
from alchina.preprocessors import Standardization
from alchina.utils import check_dataset_consistency, features_reshape


class AbstractRegressor(ABC):
class AbstractLinearRegressor(ABC):
"""Abstract class for regressors algorithms."""

def __init__(self, *args, optimizer=None, standardize: bool = True, **kwargs):
Expand Down Expand Up @@ -71,7 +71,7 @@ def score(self, X, y):
return r2_score(self.predict(X), y)


class LinearRegressor(AbstractRegressor):
class LinearRegressor(AbstractLinearRegressor):
"""Linear regressor."""

def hypothesis(self, X, theta):
Expand All @@ -95,7 +95,7 @@ def normal(self, X, y):
self.optimizer.parameters = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y)


class RidgeRegressor(AbstractRegressor):
class RidgeRegressor(AbstractLinearRegressor):
"""Ridge regressor."""

def __init__(self, *args, regularization: float = 1, **kwargs):
Expand Down
3 changes: 0 additions & 3 deletions examples/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
kmeans = KMeans(n_centroids=2)
kmeans.fit(X)

# Display the number of model iterations
print(f"K-Means iterations : {kmeans.iterations}")

# Plot the results
A = X[np.array(kmeans.indexes) == 0]
B = X[np.array(kmeans.indexes) == 1]
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Classifiers tests."""
"""Linear Classifiers tests."""

import numpy as np
import pytest
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_linear_classifier_history_disabled():


def test_linear_classifier_multiclass():
"""Test of `LinearClassifier` with no multiclass."""
"""Test of `LinearClassifier` with multiclass."""
lc = LinearClassifier(learning_rate=0.1, iterations=2)

X = np.array([[0], [1], [2]])
Expand Down
2 changes: 1 addition & 1 deletion tests/test_clusters.py → tests/clusters/test_kmeans.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Clusters tests."""
"""K-Means tests."""

import numpy as np

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Optimizers tests."""
"""Gradient Descent Optimizers tests."""

import pytest
import numpy as np
Expand Down
33 changes: 33 additions & 0 deletions tests/preprocessors/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Data Preprocessors tests."""

import numpy as np
import pytest

from alchina.preprocessors import Normalization, Standardization


# --- Normalization ---


def test_normalization():
"""Test of `Normalization` class."""
normalize = Normalization()

X = np.array([1, 10, 100, 5, 0.01])
X_norm = normalize(X)

assert np.all((X_norm >= 0) & (X_norm <= 1))


# --- Standardization ---


def test_standardization():
"""Test of `Standardization` class."""
standardize = Standardization()

X = np.array([1, 10, 100, 5, 0.01])
X_stand = standardize(X)

assert np.mean(X_stand) == pytest.approx(0)
assert np.std(X_stand) == pytest.approx(1)
31 changes: 2 additions & 29 deletions tests/test_preprocessors.py → tests/preprocessors/test_pca.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,10 @@
"""Preprocessors tests."""
"""Principal Component Analysis tests."""

import numpy as np
import pytest

from alchina.exceptions import InvalidInput, NotFitted
from alchina.preprocessors import Normalization, Standardization, PCA


# --- Normalization ---


def test_normalization():
"""Test of `Normalization` class."""
normalize = Normalization()

X = np.array([1, 10, 100, 5, 0.01])
X_norm = normalize(X)

assert np.all((X_norm >= 0) & (X_norm <= 1))


# --- Standardization ---


def test_standardization():
"""Test of `Standardization` class."""
standardize = Standardization()

X = np.array([1, 10, 100, 5, 0.01])
X_stand = standardize(X)

assert np.mean(X_stand) == pytest.approx(0)
assert np.std(X_stand) == pytest.approx(1)
from alchina.preprocessors import PCA


# --- PCA ---
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Regressors tests."""
"""Linear Regressors tests."""

import numpy as np
import pytest
Expand Down

0 comments on commit b2bc8f1

Please sign in to comment.