diff --git a/README.md b/README.md index e17bf57..39f95e9 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Alchina is a Machine Learning framework. **Clusters** -- K-Means clustering +- K-Means **Optimizers** diff --git a/alchina/classifiers/__init__.py b/alchina/classifiers/__init__.py new file mode 100644 index 0000000..636d3bc --- /dev/null +++ b/alchina/classifiers/__init__.py @@ -0,0 +1,6 @@ +"""Classifiers.""" + +from .linear import LinearClassifier, RidgeClassifier +from .knn import KNNClassifier + +__all__ = ["LinearClassifier", "RidgeClassifier", "KNNClassifier"] diff --git a/alchina/classifiers.py b/alchina/classifiers/linear.py similarity index 89% rename from alchina/classifiers.py rename to alchina/classifiers/linear.py index dcef451..7ed9b76 100644 --- a/alchina/classifiers.py +++ b/alchina/classifiers/linear.py @@ -1,18 +1,18 @@ -"""Classifiers.""" +"""Linear Classifiers.""" import numpy as np from abc import ABC, abstractmethod -from .exceptions import InvalidInput, NotFitted -from .metrics import accuracy_score -from .optimizers import GradientDescent -from .preprocessors import Standardization -from .utils import check_dataset_consistency, features_reshape +from alchina.exceptions import InvalidInput, NotFitted +from alchina.metrics import accuracy_score +from alchina.optimizers import GradientDescent +from alchina.preprocessors import Standardization +from alchina.utils import check_dataset_consistency, features_reshape -class AbstractClassifier(ABC): - """Abstract class for classifiers algorithms.""" +class AbstractLinearClassifier(ABC): + """Abstract class for linear classifiers algorithms.""" def __init__(self, *args, optimizer=None, standardize: bool = True, **kwargs): self.standardize = Standardization() if standardize else None @@ -88,7 +88,7 @@ def score(self, X, y): return accuracy_score(self.predict(X), y) -class LinearClassifier(AbstractClassifier): +class LinearClassifier(AbstractLinearClassifier): """Linear classifier (logistic regressor).""" def sigmoid(self, z): @@ -111,7 +111,7 @@ def gradient(self, X, y, theta): return X.T.dot(self.hypothesis(X, theta) - y) -class RidgeClassifier(LinearClassifier): +class RidgeClassifier(AbstractLinearClassifier): """Regularized linear classifier.""" def __init__(self, *args, regularization: float = 1, **kwargs): diff --git a/alchina/clusters/__init__.py b/alchina/clusters/__init__.py new file mode 100644 index 0000000..d9ce662 --- /dev/null +++ b/alchina/clusters/__init__.py @@ -0,0 +1,5 @@ +"""Clusters.""" + +from .kmeans import KMeans + +__all__ = ["KMeans"] diff --git a/alchina/clusters.py b/alchina/clusters/kmeans.py similarity index 97% rename from alchina/clusters.py rename to alchina/clusters/kmeans.py index d10784f..5a4c073 100644 --- a/alchina/clusters.py +++ b/alchina/clusters/kmeans.py @@ -1,10 +1,10 @@ -"""Clusters.""" +"""K-Means.""" import numpy as np class KMeans(object): - """K-means algorithm.""" + """K-Means algorithm.""" def __init__(self, n_centroids, max_iterations=300): self.n_centroids = n_centroids diff --git a/alchina/optimizers/__init__.py b/alchina/optimizers/__init__.py new file mode 100644 index 0000000..6942544 --- /dev/null +++ b/alchina/optimizers/__init__.py @@ -0,0 +1,5 @@ +"""Optimizers.""" + +from .gradient_descent import GradientDescent, SGD, MBGD + +__all__ = ["GradientDescent", "SGD", "MBGD"] diff --git a/alchina/optimizers.py b/alchina/optimizers/gradient_descent.py similarity index 93% rename from alchina/optimizers.py rename to alchina/optimizers/gradient_descent.py index 5a24c79..49fea77 100644 --- a/alchina/optimizers.py +++ b/alchina/optimizers/gradient_descent.py @@ -1,15 +1,15 @@ -"""Optimizers algorithms.""" +"""Gradient Descent Optimizers algorithms.""" import numpy as np from abc import ABC, abstractmethod from typing import Optional -from .exceptions import InvalidInput, NotBuilt -from .utils import check_dataset_consistency, shuffle_dataset +from alchina.exceptions import InvalidInput, NotBuilt +from alchina.utils import check_dataset_consistency, shuffle_dataset -class AbstractOptimizer(ABC): +class AbstractGDOptimizer(ABC): """Abstract class for optimizers algorithms.""" def __init__(self, iterations: int = 100, history: bool = False): @@ -32,7 +32,7 @@ def __call__(self): pass # pragma: no cover -class GradientDescent(AbstractOptimizer): +class GradientDescent(AbstractGDOptimizer): """Batch gradient descent.""" def __init__(self, *args, learning_rate: float = 0.01, **kwargs): @@ -60,7 +60,7 @@ def __call__(self, X, y, *args, **kwargs): return self.parameters -class SGD(AbstractOptimizer): +class SGD(AbstractGDOptimizer): """Stochastic gradient descent.""" def __init__( @@ -92,7 +92,7 @@ def __call__(self, X, y, *args, **kwargs): return self.parameters -class MBGD(AbstractOptimizer): +class MBGD(AbstractGDOptimizer): """Mini-batch gradient descent.""" def __init__( diff --git a/alchina/preprocessors/__init__.py b/alchina/preprocessors/__init__.py new file mode 100644 index 0000000..8d53785 --- /dev/null +++ b/alchina/preprocessors/__init__.py @@ -0,0 +1,6 @@ +"""Preprocessors.""" + +from .data import Normalization, Standardization +from .pca import PCA + +__all__ = ["Normalization", "Standardization", "PCA"] diff --git a/alchina/preprocessors/data.py b/alchina/preprocessors/data.py new file mode 100644 index 0000000..81da89b --- /dev/null +++ b/alchina/preprocessors/data.py @@ -0,0 +1,41 @@ +"""Data Preprocessors.""" + +import numpy as np + +from typing import Optional + + +class Normalization(object): + """Rescale the data via a normalization. + + Produce: + - Bring all values into the range [0, 1] + """ + + def __call__(self, X, axis: int = 0): + min_x = np.amin(X, axis=axis) + max_x = np.amax(X, axis=axis) + return (X - min_x) / (max_x - min_x) + + +class Standardization(object): + """Rescale the data via a standardization + + Produce: + - mean(Xstandardized) = 0 + - std(Xstandardized) = 1 + """ + + def __init__(self, mu: Optional[int] = None, sigma: Optional[int] = None): + self.mu = mu + self.sigma = sigma + + def __call__(self, X, axis: int = 0): + if self.mu is None or self.sigma is None: + self.mu = np.mean(X, axis=axis) + self.sigma = np.std(X, axis=axis) + + if not np.any(self.sigma): + self.sigma = np.ones_like(self.sigma) + + return np.divide(X - self.mu, self.sigma) diff --git a/alchina/preprocessors.py b/alchina/preprocessors/pca.py similarity index 64% rename from alchina/preprocessors.py rename to alchina/preprocessors/pca.py index c4995a6..3f09e4f 100644 --- a/alchina/preprocessors.py +++ b/alchina/preprocessors/pca.py @@ -1,51 +1,15 @@ -"""Preprocessors.""" +"""Principal Component Analysis.""" import numpy as np from typing import Optional -from .exceptions import InvalidInput, NotFitted -from .utils import features_reshape - - -class Normalization(object): - """Rescale the data via a normalization. - - Produce: - - Bring all values into the range [0, 1] - """ - - def __call__(self, X, axis: int = 0): - min_x = np.amin(X, axis=axis) - max_x = np.amax(X, axis=axis) - return (X - min_x) / (max_x - min_x) - - -class Standardization(object): - """Rescale the data via a standardization - - Produce: - - mean(Xstandardized) = 0 - - std(Xstandardized) = 1 - """ - - def __init__(self, mu: Optional[int] = None, sigma: Optional[int] = None): - self.mu = mu - self.sigma = sigma - - def __call__(self, X, axis: int = 0): - if self.mu is None or self.sigma is None: - self.mu = np.mean(X, axis=axis) - self.sigma = np.std(X, axis=axis) - - if not np.any(self.sigma): - self.sigma = np.ones_like(self.sigma) - - return np.divide(X - self.mu, self.sigma) +from alchina.exceptions import InvalidInput, NotFitted +from alchina.utils import features_reshape class PCA(object): - """Principal Component Analysis.""" + """Principal Component Analysis algorithm.""" def __init__(self, n_components: Optional[int] = None): self.n_components = n_components diff --git a/alchina/regressors/__init__.py b/alchina/regressors/__init__.py new file mode 100644 index 0000000..636e8d9 --- /dev/null +++ b/alchina/regressors/__init__.py @@ -0,0 +1,5 @@ +"""Regressors.""" + +from .linear import LinearRegressor, RidgeRegressor + +__all__ = ["LinearRegressor", "RidgeRegressor"] diff --git a/alchina/regressors.py b/alchina/regressors/linear.py similarity index 90% rename from alchina/regressors.py rename to alchina/regressors/linear.py index a0b09e1..86d11d1 100644 --- a/alchina/regressors.py +++ b/alchina/regressors/linear.py @@ -4,14 +4,14 @@ from abc import ABC, abstractmethod -from .exceptions import InvalidInput, NotFitted -from .metrics import r2_score -from .optimizers import GradientDescent -from .preprocessors import Standardization -from .utils import check_dataset_consistency, features_reshape +from alchina.exceptions import InvalidInput, NotFitted +from alchina.metrics import r2_score +from alchina.optimizers import GradientDescent +from alchina.preprocessors import Standardization +from alchina.utils import check_dataset_consistency, features_reshape -class AbstractRegressor(ABC): +class AbstractLinearRegressor(ABC): """Abstract class for regressors algorithms.""" def __init__(self, *args, optimizer=None, standardize: bool = True, **kwargs): @@ -71,7 +71,7 @@ def score(self, X, y): return r2_score(self.predict(X), y) -class LinearRegressor(AbstractRegressor): +class LinearRegressor(AbstractLinearRegressor): """Linear regressor.""" def hypothesis(self, X, theta): @@ -95,7 +95,7 @@ def normal(self, X, y): self.optimizer.parameters = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(y) -class RidgeRegressor(AbstractRegressor): +class RidgeRegressor(AbstractLinearRegressor): """Ridge regressor.""" def __init__(self, *args, regularization: float = 1, **kwargs): diff --git a/examples/clusters.py b/examples/clusters.py index 52ad655..82f036a 100644 --- a/examples/clusters.py +++ b/examples/clusters.py @@ -23,9 +23,6 @@ kmeans = KMeans(n_centroids=2) kmeans.fit(X) -# Display the number of model iterations -print(f"K-Means iterations : {kmeans.iterations}") - # Plot the results A = X[np.array(kmeans.indexes) == 0] B = X[np.array(kmeans.indexes) == 1] diff --git a/examples/selection.py b/examples/metrics.py similarity index 100% rename from examples/selection.py rename to examples/metrics.py diff --git a/tests/test_classifiers.py b/tests/classifiers/test_linear_classifiers.py similarity index 98% rename from tests/test_classifiers.py rename to tests/classifiers/test_linear_classifiers.py index f9471b4..7d6bea6 100644 --- a/tests/test_classifiers.py +++ b/tests/classifiers/test_linear_classifiers.py @@ -1,4 +1,4 @@ -"""Classifiers tests.""" +"""Linear Classifiers tests.""" import numpy as np import pytest @@ -75,7 +75,7 @@ def test_linear_classifier_history_disabled(): def test_linear_classifier_multiclass(): - """Test of `LinearClassifier` with no multiclass.""" + """Test of `LinearClassifier` with multiclass.""" lc = LinearClassifier(learning_rate=0.1, iterations=2) X = np.array([[0], [1], [2]]) diff --git a/tests/test_clusters.py b/tests/clusters/test_kmeans.py similarity index 95% rename from tests/test_clusters.py rename to tests/clusters/test_kmeans.py index cbf7af6..f0c1937 100644 --- a/tests/test_clusters.py +++ b/tests/clusters/test_kmeans.py @@ -1,4 +1,4 @@ -"""Clusters tests.""" +"""K-Means tests.""" import numpy as np diff --git a/tests/test_optimizers.py b/tests/optimizers/test_gradient_descent.py similarity index 98% rename from tests/test_optimizers.py rename to tests/optimizers/test_gradient_descent.py index 547e98c..3705b23 100644 --- a/tests/test_optimizers.py +++ b/tests/optimizers/test_gradient_descent.py @@ -1,4 +1,4 @@ -"""Optimizers tests.""" +"""Gradient Descent Optimizers tests.""" import pytest import numpy as np diff --git a/tests/preprocessors/test_data.py b/tests/preprocessors/test_data.py new file mode 100644 index 0000000..ed47318 --- /dev/null +++ b/tests/preprocessors/test_data.py @@ -0,0 +1,33 @@ +"""Data Preprocessors tests.""" + +import numpy as np +import pytest + +from alchina.preprocessors import Normalization, Standardization + + +# --- Normalization --- + + +def test_normalization(): + """Test of `Normalization` class.""" + normalize = Normalization() + + X = np.array([1, 10, 100, 5, 0.01]) + X_norm = normalize(X) + + assert np.all((X_norm >= 0) & (X_norm <= 1)) + + +# --- Standardization --- + + +def test_standardization(): + """Test of `Standardization` class.""" + standardize = Standardization() + + X = np.array([1, 10, 100, 5, 0.01]) + X_stand = standardize(X) + + assert np.mean(X_stand) == pytest.approx(0) + assert np.std(X_stand) == pytest.approx(1) diff --git a/tests/test_preprocessors.py b/tests/preprocessors/test_pca.py similarity index 64% rename from tests/test_preprocessors.py rename to tests/preprocessors/test_pca.py index 2d5f7df..4795a27 100644 --- a/tests/test_preprocessors.py +++ b/tests/preprocessors/test_pca.py @@ -1,37 +1,10 @@ -"""Preprocessors tests.""" +"""Principal Component Analysis tests.""" import numpy as np import pytest from alchina.exceptions import InvalidInput, NotFitted -from alchina.preprocessors import Normalization, Standardization, PCA - - -# --- Normalization --- - - -def test_normalization(): - """Test of `Normalization` class.""" - normalize = Normalization() - - X = np.array([1, 10, 100, 5, 0.01]) - X_norm = normalize(X) - - assert np.all((X_norm >= 0) & (X_norm <= 1)) - - -# --- Standardization --- - - -def test_standardization(): - """Test of `Standardization` class.""" - standardize = Standardization() - - X = np.array([1, 10, 100, 5, 0.01]) - X_stand = standardize(X) - - assert np.mean(X_stand) == pytest.approx(0) - assert np.std(X_stand) == pytest.approx(1) +from alchina.preprocessors import PCA # --- PCA --- diff --git a/tests/test_regressors.py b/tests/regressors/test_linear_regressors.py similarity index 99% rename from tests/test_regressors.py rename to tests/regressors/test_linear_regressors.py index 5a05615..41005ea 100644 --- a/tests/test_regressors.py +++ b/tests/regressors/test_linear_regressors.py @@ -1,4 +1,4 @@ -"""Regressors tests.""" +"""Linear Regressors tests.""" import numpy as np import pytest