readthedocs integration, matplotlib added for 10-fold cross validatio…

…n results, tests fixed, fitness names added
firefly-cpp · Dec 1, 2020 · 1300818 · 1300818
1 parent 7fe889a
commit 1300818
Show file tree

Hide file tree

Showing 15 changed files with 117 additions and 50 deletions.
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -0,0 +1,5 @@
+requirements_file: docs/requirements.txt
+build:
+  image: latest
+python:
+  version: 3.8
diff --git a/README.md b/README.md
@@ -6,6 +6,7 @@
 [![GitHub license](https://img.shields.io/github/license/lukapecnik/niaaml.svg)](https://github.com/lukapecnik/niaaml/blob/master/LICENSE)
 [![Build Status](https://travis-ci.com/lukapecnik/NiaAML.svg?branch=master)](https://travis-ci.com/lukapecnik/NiaAML)
 [![Coverage Status](https://coveralls.io/repos/github/lukapecnik/NiaAML/badge.svg?branch=master)](https://coveralls.io/github/lukapecnik/NiaAML?branch=master)
+[![Documentation Status](https://readthedocs.org/projects/niaaml/badge/?version=latest)](https://niaaml.readthedocs.io/en/latest/?badge=latest)
 
 ![GitHub commit activity](https://img.shields.io/github/commit-activity/w/lukapecnik/niaaml.svg)
 [![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/lukapecnik/niaaml.svg)](http://isitmaintained.com/project/lukapecnik/niaaml "Average time to resolve an issue")

diff --git a/docs/requirements.txt b/docs/requirements.txt
diff --git a/examples/optimization_stats.py b/examples/optimization_stats.py
@@ -0,0 +1,32 @@
+from niaaml.utilities import OptimizationStats
+import numpy as np
+
+"""
+In this example, we show how the OptimizationStats class can be used. Normally, it is used in the background when the Pipeline's optimize method is called.
+You may also use it on its own if you find any use.
+"""
+
+# dummy array with expected results of classification process
+y = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
+'Class 2', 'Class 2', 'Class 2', 'Class 1', 'Class 1', 'Class 2',
+'Class 1', 'Class 2', 'Class 1', 'Class 1', 'Class 1', 'Class 1',
+'Class 2', 'Class 1'])
+
+# dummy array with predicted classes
+predicted = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
+'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
+'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
+'Class 2', 'Class 2'])
+
+# let's say these are fitness scores of the 10-fold cross validation
+fitness_scores = np.array([0.5, 0.55, 0.45, 0.57, 0.6, 0.47, 0.53, 0.52, 0.58, 0.44])
+
+# instantiate OptimizationStats
+# let's say the used fitness function's name is Accuracy
+stats = OptimizationStats(predicted, y, fitness_scores, 'Accuracy')
+
+# export boxplot of the 10-fold cross validation scores
+stats.export_boxplot('boxplot.png')
+
+# print user-friendly text representation
+print(stats.to_string())
diff --git a/niaaml/fitness/accuracy.py b/niaaml/fitness/accuracy.py
@@ -20,6 +20,7 @@ class Accuracy(FitnessFunction):
     See Also:
         * :class:`niaaml.fitness.FitnessFunction`
     """
+    Name = 'Accuracy'
 
     def get_fitness(self, predicted, expected):
         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.

diff --git a/niaaml/fitness/cohen_kappa.py b/niaaml/fitness/cohen_kappa.py
@@ -20,6 +20,7 @@ class CohenKappa(FitnessFunction):
     See Also:
         * :class:`niaaml.fitness.FitnessFunction`
     """
+    Name = 'Cohen\'s Kappa'
 
     def get_fitness(self, predicted, expected):
         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.

diff --git a/niaaml/fitness/f1.py b/niaaml/fitness/f1.py
@@ -20,6 +20,7 @@ class F1(FitnessFunction):
     See Also:
         * :class:`niaaml.fitness.FitnessFunction`
     """
+    Name = 'F-score'
 
     def get_fitness(self, predicted, expected):
         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.

diff --git a/niaaml/fitness/fitness_function.py b/niaaml/fitness/fitness_function.py
@@ -13,7 +13,11 @@ class FitnessFunction:
 
     License:
         MIT
+
+    Attributes:
+        Name (str): Name of the fitness function.
     """
+    Name = None
 
     def __init__(self, **kwargs):
         r"""Initialize fitness function.

diff --git a/niaaml/fitness/precision.py b/niaaml/fitness/precision.py
@@ -20,6 +20,7 @@ class Precision(FitnessFunction):
     See Also:
         * :class:`niaaml.fitness.FitnessFunction`
     """
+    Name = 'Precision'
 
     def get_fitness(self, predicted, expected):
         r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.

diff --git a/niaaml/pipeline.py b/niaaml/pipeline.py
@@ -210,6 +210,18 @@ def export_text(self, file_name):
         with open(file_name, 'w') as f:
             f.write(pipeline.to_string())
 
+    def export_boxplot(self, file_name):
+        r"""Export boxplot of fitness function's values in the 10-fold cross validation's process.
+        Uses OptimizationStats' export_boxplot method.
+
+        Arguments:
+            file_name (str): Output file name.
+        
+        See also:
+            * :func:`niaaml.utilities.OptimizationStats.export_boxplot`
+        """
+        self.__best_stats.export_boxplot(file_name)
+
     @staticmethod
     def load(file_name):
         r"""Loads Pipeline object from a file.
@@ -362,7 +374,7 @@ def evaluate(D, sol):
                     self.__parent.set_feature_transform_algorithm(feature_transform_algorithm)
                     self.__parent.set_classifier(classifier)
                     self.__parent.set_selected_features_mask(selected_features_mask)
-                    self.__parent.set_stats(OptimizationStats(predictions, y_test, scores))
+                    self.__parent.set_stats(OptimizationStats(predictions, y_test, scores, self.__fitness_function.Name))
 
                 return fitness
             except:

diff --git a/niaaml/tests/test_pipeline.py b/niaaml/tests/test_pipeline.py
@@ -43,6 +43,19 @@ def test_pipeline_run_works_fine(self):
         s2 = set(predicted)
         self.assertTrue(s2.issubset(s1))
         self.assertTrue(len(s2) > 0 and len(s2) <= 2)
+
+    def test_pipeline_export_boxplot_works_fine(self):
+        data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/tests_files/dataset_header_classes.csv', has_header=True, contains_classes=True)
+        self.__pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 20, 40, 'ParticleSwarmAlgorithm', 'Accuracy')
+
+        with tempfile.TemporaryDirectory() as tmp:
+            self.__pipeline.export_boxplot(os.path.join(tmp, 'boxplot'))
+            self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
+            self.assertEqual(1, len([name for name in os.listdir(tmp)]))
+
+            self.__pipeline.export_boxplot(os.path.join(tmp, 'boxplot.png'))
+            self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
+            self.assertEqual(1, len([name for name in os.listdir(tmp)]))
 
     def test_pipeline_export_works_fine(self):
         with tempfile.TemporaryDirectory() as tmp:
@@ -81,7 +94,7 @@ def test_pipeline_setters_work_fine(self):
        'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
        'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
        'Class 2', 'Class 2'])
-        self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y, numpy.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76])))
+        self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y, numpy.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]), 'Accuracy'))
 
         self.assertIsInstance(self.__pipeline.get_classifier(), AdaBoost)
         self.assertIsInstance(self.__pipeline.get_feature_selection_algorithm(), SelectPercentile)

diff --git a/niaaml/tests/test_utilities.py b/niaaml/tests/test_utilities.py
@@ -33,14 +33,15 @@ def setUp(self):
        'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
        'Class 2', 'Class 2'])
 
-        self.__stats = OptimizationStats(predicted, y, np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]))
+        self.__stats = OptimizationStats(predicted, y, np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]), 'Accuracy')
 
     def test_works_fine(self):
         self.assertEqual(self.__stats._accuracy, 0.5)
         self.assertEqual(self.__stats._precision, 0.5199999999999999)
         self.assertEqual(self.__stats._cohen_kappa, 0.0)
         self.assertEqual(self.__stats._f1_score, 0.505050505050505)
         self.assertTrue((np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]) == self.__stats._fitness_function_values).all())
+        self.assertEqual(self.__stats._fitness_function_name, 'Accuracy')
 
     def test_export_works_fine(self):
         with tempfile.TemporaryDirectory() as tmp:

diff --git a/niaaml/utilities.py b/niaaml/utilities.py
@@ -1,8 +1,8 @@
 from sklearn import preprocessing
 from sklearn.metrics import accuracy_score, precision_score, cohen_kappa_score, f1_score
 import numpy as np
-import seaborn as sns
 import os
+import matplotlib.pyplot as plt
 
 __all__ = [
     'MinMax',
@@ -146,21 +146,25 @@ class OptimizationStats:
         _precision (float): Calculated precision.
         _cohen_kappa (float): Calculated Cohen's kappa.
         _f1_score (float): Calculated F1-score.
+        _fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process (10-fold cross validation's results).
+        _fitness_function_name (str): Name of the used fitness function.
     """
 
-    def __init__(self, predicted, expected, fitness_function_values, **kwargs):
+    def __init__(self, predicted, expected, fitness_function_values, fitness_function_name, **kwargs):
         r"""Initialize the factory.
 
         Arguments:
             predicted (Iterable[any]): Array of predicted classes.
             expected (Iterable[any]): Array of expected classes.
-            fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process.
+            fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process (10-fold cross validation's results).
+            fitness_function_name (str): Name of the used fitness function.
         """
         self._accuracy = accuracy_score(expected, predicted)
         self._precision = precision_score(expected, predicted, average='weighted')
         self._cohen_kappa = cohen_kappa_score(expected, predicted)
         self._f1_score = f1_score(expected, predicted, average='weighted')
         self._fitness_function_values = fitness_function_values
+        self._fitness_function_name = fitness_function_name
 
     def export_boxplot(self, file_name):
         r"""Export boxplot of fitness function's values.
@@ -171,13 +175,16 @@ def export_boxplot(self, file_name):
         if len(os.path.splitext(file_name)[1]) == 0 or os.path.splitext(file_name)[1] != '.png':
             file_name = file_name + '.png'
 
-        boxplot = sns.boxplot(data=[self._fitness_function_values])
-        boxplot.figure.savefig(file_name)
+        fig, ax = plt.subplots()
+        ax.set_title(self._fitness_function_name)
+        ax.boxplot(self._fitness_function_values)
+        ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
+        plt.savefig(file_name)
 
     def to_string(self):
         r"""User friendly representation of the object.
 
         Returns:
             str: User friendly representation of the object.
         """
-        return 'Accuracy: {acc},\nPrecision: {prc},\nCohen\'s kappa: {ck},\nF1-score: {f1}'.format(acc=self._accuracy, prc=self._precision, ck=self._cohen_kappa, f1=self._f1_score)
+        return 'Accuracy: {acc},\nPrecision: {prc},\nCohen\'s kappa: {ck},\nF1-score: {f1},\n\nFitness function\'s ({fn}) 10-fold cross validation results: {arr}'.format(acc=self._accuracy, prc=self._precision, ck=self._cohen_kappa, f1=self._f1_score, fn=self._fitness_function_name, arr=np.array2string(self._fitness_function_values, separator=', '))
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,7 @@ python = "^3.8"
 numpy = "^1.19.1"
 scikit-learn = "^0.23.2"
 NiaPy = "^2.0.0rc11"
-seaborn = "^0.11.0"
+matplotlib = "^3.3.3"
 
 [tool.poetry.dev-dependencies]
 sphinx = "^3.3.1"