Skip to content

Commit

Permalink
examples' descriptions added, test fixes, seaborn initial
Browse files Browse the repository at this point in the history
  • Loading branch information
lukapecnik committed Nov 30, 2020
1 parent 992ce86 commit 7fe889a
Show file tree
Hide file tree
Showing 19 changed files with 142 additions and 13 deletions.
5 changes: 5 additions & 0 deletions examples/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from niaaml.data import CSVDataReader
import numpy

"""
In this example, we show how to individually use an implemented classifier and its methods. In this case we use AdaBoost for demonstration, but
you can use any of the implemented classifiers in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
4 changes: 4 additions & 0 deletions examples/export_pipeline_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from niaaml.preprocessing.feature_selection import SelectKBest
from niaaml.preprocessing.feature_transform import Normalizer

"""
In this example, we show how to export a pipeline object into a file that can later be loaded back into a Python program as a Pipeline object.
"""

# instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as feature transformation algorithm
pipeline = Pipeline(
feature_selection_algorithm=SelectKBest(),
Expand Down
5 changes: 5 additions & 0 deletions examples/export_pipeline_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from niaaml.preprocessing.feature_selection import SelectKBest
from niaaml.preprocessing.feature_transform import Normalizer

"""
In this example, we show how to export a pipeline object into a text file in a user-friendly form. Text file cannot be loaded back into a Python program in
a form of a Pipeline object.
"""

# instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as feature transformation algorithm
pipeline = Pipeline(
feature_selection_algorithm=SelectKBest(),
Expand Down
5 changes: 5 additions & 0 deletions examples/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from niaaml.preprocessing.feature_transform import FeatureTransformAlgorithmFactory
from niaaml.fitness import FitnessFactory

"""
In this example, we show how to use all of the implemented factories to create new object instances using their class names. You may also
import and instantiate objects directly, but it more convenient to use factories in some cases.
"""

# instantiate all possible factories
classifier_factory = ClassifierFactory()
fsa_factory = FeatureSelectionAlgorithmFactory()
Expand Down
5 changes: 5 additions & 0 deletions examples/feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from niaaml.data import CSVDataReader
from sklearn.feature_selection import chi2

"""
In this example, we show how to individually use an implemented feature selection algorithm and its methods. In this case we use SelectKBest for demonstration, but
you can use any of the implemented feature selection algorithms in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
5 changes: 5 additions & 0 deletions examples/feature_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
import os
from niaaml.data import CSVDataReader

"""
In this example, we show how to individually use an implemented feature transform algorithm and its methods. In this case we use Normalizer for demonstration, but
you can use any of the implemented feature transform algorithms in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
5 changes: 5 additions & 0 deletions examples/fitness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
import os
import numpy

"""
In this example, we show how to individually use an implemented fitness function and its method. In this case we use Precision for demonstration, but
you can use any of the implemented fitness functions in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
5 changes: 5 additions & 0 deletions examples/load_data_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from niaaml.data import BasicDataReader
import numpy

"""
In this example, we show how to instantiate BasicDataReader and use its methods. You can use it to contain data in a single variable
or as an input to an instance of the PipelineOptimizer class.
"""

# BasicDataReader instance uses arrays on the input (x and y arrays)
data_reader = BasicDataReader(
x=numpy.random.uniform(low=0.0, high=15.0, size=(50, 3)),
Expand Down
5 changes: 5 additions & 0 deletions examples/load_data_csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import os
from niaaml.data import CSVDataReader

"""
In this example, we show how to instantiate CSVDataReader and use its methods. You can use it to contain data in a single variable
or as an input to an instance of the PipelineOptimizer class.
"""

# CSVDataReader get a path to csv file on the input, reads and parses it into the x and y arrays
# has_header and contains_classes arguments needs to be set according to the input csv file's structure
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)
Expand Down
4 changes: 4 additions & 0 deletions examples/load_pipeline_object_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import os
from niaaml import Pipeline

"""
In this example, we show how to load a saved Pipeline object from a file. You can use all of its methods after it's been successfully loaded.
"""

# load Pipeline object from a file
pipeline = Pipeline.load(os.path.dirname(os.path.abspath(__file__)) + '/example_files/pipeline.ppln')

Expand Down
4 changes: 4 additions & 0 deletions examples/optimize_run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import os
import numpy

"""
In this example, we show how to individually use the Pipeline class. You may use this if you want to test out a specific classification pipeline.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
5 changes: 5 additions & 0 deletions examples/run_pipeline_optimizer_array_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from niaaml.data import BasicDataReader
import numpy

"""
In this example, we show how to use the PipelineOptimizer class. This example is using an instance of BasicDataReader.
The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
"""

# prepare data reader using features and classes from arrays
# in this case random dummy arrays are generated
data_reader = BasicDataReader(
Expand Down
5 changes: 5 additions & 0 deletions examples/run_pipeline_optimizer_csv_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from niaaml import PipelineOptimizer, Pipeline
from niaaml.data import CSVDataReader

"""
In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader.
The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

Expand Down
14 changes: 13 additions & 1 deletion niaaml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ def optimize(self, x, y, population_size, number_of_evaluations, optimization_al
optimization_algorithm (str): Name of the optimization algorithm to use.
fitness_function (str): Name of the fitness function to use.
Notes:
Stratified K-Fold Cross Validation in our optimization process splits a dataset on the input 11 times,
but we are actually running a stratified 10 fold cross validation since the first iteration is only used to fit
feature selection and feature transform algorithms. This way the evaluation is faster with no difference in
quality.
Returns:
float: Best fitness value found in optimization process.
"""
Expand Down Expand Up @@ -277,6 +283,12 @@ def evaluate(D, sol):
D (uint): Number of dimensionas.
sol (numpy.ndarray[float]): Individual of population/ possible solution.
Notes:
Stratified K-Fold Cross Validation in our optimization process splits a dataset on the input 11 times,
but we are actually running a stratified 10 fold cross validation since the first iteration is only used to fit
feature selection and feature transform algorithms. This way the evaluation is faster with no difference in
quality.
Returns:
float: Fitness.
"""
Expand Down Expand Up @@ -350,7 +362,7 @@ def evaluate(D, sol):
self.__parent.set_feature_transform_algorithm(feature_transform_algorithm)
self.__parent.set_classifier(classifier)
self.__parent.set_selected_features_mask(selected_features_mask)
self.__parent.set_stats(OptimizationStats(predictions, y_test))
self.__parent.set_stats(OptimizationStats(predictions, y_test, scores))

return fitness
except:
Expand Down
2 changes: 1 addition & 1 deletion niaaml/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_pipeline_setters_work_fine(self):
'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2'])
self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y))
self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y, numpy.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76])))

self.assertIsInstance(self.__pipeline.get_classifier(), AdaBoost)
self.assertIsInstance(self.__pipeline.get_feature_selection_algorithm(), SelectPercentile)
Expand Down
31 changes: 23 additions & 8 deletions niaaml/tests/test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from unittest import TestCase
from niaaml import ParameterDefinition, MinMax, OptimizationStats, get_bin_index
import numpy as np
import tempfile
import os

class UtilitiesTestCase(TestCase):
def test_get_bin_index_works_fine(self):
Expand All @@ -21,21 +23,34 @@ def test_works_fine(self):
self.assertEqual(parameter_definition.param_type, float)

class OptimizationStatsTestCase(TestCase):
def test_works_fine(self):
self.__y = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
def setUp(self):
y = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2', 'Class 2', 'Class 1', 'Class 1', 'Class 2',
'Class 1', 'Class 2', 'Class 1', 'Class 1', 'Class 1', 'Class 1',
'Class 2', 'Class 1'])
self.__predicted = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
predicted = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2'])

stats = OptimizationStats(self.__predicted, self.__y)
self.assertEqual(stats._accuracy, 0.5)
self.assertEqual(stats._precision, 0.5199999999999999)
self.assertEqual(stats._cohen_kappa, 0.0)
self.assertEqual(stats._f1_score, 0.505050505050505)
self.__stats = OptimizationStats(predicted, y, np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]))

def test_works_fine(self):
self.assertEqual(self.__stats._accuracy, 0.5)
self.assertEqual(self.__stats._precision, 0.5199999999999999)
self.assertEqual(self.__stats._cohen_kappa, 0.0)
self.assertEqual(self.__stats._f1_score, 0.505050505050505)
self.assertTrue((np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]) == self.__stats._fitness_function_values).all())

def test_export_works_fine(self):
with tempfile.TemporaryDirectory() as tmp:
self.__stats.export_boxplot(os.path.join(tmp, 'boxplot'))
self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
self.assertEqual(1, len([name for name in os.listdir(tmp)]))

self.__stats.export_boxplot(os.path.join(tmp, 'boxplot.png'))
self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
self.assertEqual(1, len([name for name in os.listdir(tmp)]))

class MinMaxTestCase(TestCase):
def test_works_fine(self):
Expand Down
25 changes: 23 additions & 2 deletions niaaml/utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, precision_score, cohen_kappa_score, f1_score
import numpy as np
import seaborn as sns
import os

__all__ = [
'MinMax',
Expand Down Expand Up @@ -146,12 +148,31 @@ class OptimizationStats:
_f1_score (float): Calculated F1-score.
"""

def __init__(self, predicted, expected, **kwargs):
r"""Initialize the factory."""
def __init__(self, predicted, expected, fitness_function_values, **kwargs):
r"""Initialize the factory.
Arguments:
predicted (Iterable[any]): Array of predicted classes.
expected (Iterable[any]): Array of expected classes.
fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process.
"""
self._accuracy = accuracy_score(expected, predicted)
self._precision = precision_score(expected, predicted, average='weighted')
self._cohen_kappa = cohen_kappa_score(expected, predicted)
self._f1_score = f1_score(expected, predicted, average='weighted')
self._fitness_function_values = fitness_function_values

def export_boxplot(self, file_name):
r"""Export boxplot of fitness function's values.
Arguments:
file_name (str): Output file name.
"""
if len(os.path.splitext(file_name)[1]) == 0 or os.path.splitext(file_name)[1] != '.png':
file_name = file_name + '.png'

boxplot = sns.boxplot(data=[self._fitness_function_values])
boxplot.figure.savefig(file_name)

def to_string(self):
r"""User friendly representation of the object.
Expand Down
20 changes: 19 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ python = "^3.8"
numpy = "^1.19.1"
scikit-learn = "^0.23.2"
NiaPy = "^2.0.0rc11"
seaborn = "^0.11.0"

[tool.poetry.dev-dependencies]
sphinx = "^3.3.1"
Expand Down

0 comments on commit 7fe889a

Please sign in to comment.