Skip to content

Commit

Permalink
readthedocs integration, matplotlib added for 10-fold cross validatio…
Browse files Browse the repository at this point in the history
…n results, tests fixed, fitness names added
  • Loading branch information
lukapecnik committed Dec 1, 2020
1 parent 7fe889a commit 1300818
Show file tree
Hide file tree
Showing 15 changed files with 117 additions and 50 deletions.
5 changes: 5 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
requirements_file: docs/requirements.txt
build:
image: latest
python:
version: 3.8
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
[![GitHub license](https://img.shields.io/github/license/lukapecnik/niaaml.svg)](https://github.com/lukapecnik/niaaml/blob/master/LICENSE)
[![Build Status](https://travis-ci.com/lukapecnik/NiaAML.svg?branch=master)](https://travis-ci.com/lukapecnik/NiaAML)
[![Coverage Status](https://coveralls.io/repos/github/lukapecnik/NiaAML/badge.svg?branch=master)](https://coveralls.io/github/lukapecnik/NiaAML?branch=master)
[![Documentation Status](https://readthedocs.org/projects/niaaml/badge/?version=latest)](https://niaaml.readthedocs.io/en/latest/?badge=latest)

![GitHub commit activity](https://img.shields.io/github/commit-activity/w/lukapecnik/niaaml.svg)
[![Average time to resolve an issue](http://isitmaintained.com/badge/resolution/lukapecnik/niaaml.svg)](http://isitmaintained.com/project/lukapecnik/niaaml "Average time to resolve an issue")
Expand Down
Binary file added docs/requirements.txt
Binary file not shown.
32 changes: 32 additions & 0 deletions examples/optimization_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from niaaml.utilities import OptimizationStats
import numpy as np

"""
In this example, we show how the OptimizationStats class can be used. Normally, it is used in the background when the Pipeline's optimize method is called.
You may also use it on its own if you find any use.
"""

# dummy array with expected results of classification process
y = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2', 'Class 2', 'Class 1', 'Class 1', 'Class 2',
'Class 1', 'Class 2', 'Class 1', 'Class 1', 'Class 1', 'Class 1',
'Class 2', 'Class 1'])

# dummy array with predicted classes
predicted = np.array(['Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2'])

# let's say these are fitness scores of the 10-fold cross validation
fitness_scores = np.array([0.5, 0.55, 0.45, 0.57, 0.6, 0.47, 0.53, 0.52, 0.58, 0.44])

# instantiate OptimizationStats
# let's say the used fitness function's name is Accuracy
stats = OptimizationStats(predicted, y, fitness_scores, 'Accuracy')

# export boxplot of the 10-fold cross validation scores
stats.export_boxplot('boxplot.png')

# print user-friendly text representation
print(stats.to_string())
1 change: 1 addition & 0 deletions niaaml/fitness/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Accuracy(FitnessFunction):
See Also:
* :class:`niaaml.fitness.FitnessFunction`
"""
Name = 'Accuracy'

def get_fitness(self, predicted, expected):
r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
Expand Down
1 change: 1 addition & 0 deletions niaaml/fitness/cohen_kappa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class CohenKappa(FitnessFunction):
See Also:
* :class:`niaaml.fitness.FitnessFunction`
"""
Name = 'Cohen\'s Kappa'

def get_fitness(self, predicted, expected):
r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
Expand Down
1 change: 1 addition & 0 deletions niaaml/fitness/f1.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class F1(FitnessFunction):
See Also:
* :class:`niaaml.fitness.FitnessFunction`
"""
Name = 'F-score'

def get_fitness(self, predicted, expected):
r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
Expand Down
4 changes: 4 additions & 0 deletions niaaml/fitness/fitness_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ class FitnessFunction:
License:
MIT
Attributes:
Name (str): Name of the fitness function.
"""
Name = None

def __init__(self, **kwargs):
r"""Initialize fitness function.
Expand Down
1 change: 1 addition & 0 deletions niaaml/fitness/precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Precision(FitnessFunction):
See Also:
* :class:`niaaml.fitness.FitnessFunction`
"""
Name = 'Precision'

def get_fitness(self, predicted, expected):
r"""Return fitness value. The larger return value should represent a better fitness for the framework to work properly.
Expand Down
14 changes: 13 additions & 1 deletion niaaml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,18 @@ def export_text(self, file_name):
with open(file_name, 'w') as f:
f.write(pipeline.to_string())

def export_boxplot(self, file_name):
r"""Export boxplot of fitness function's values in the 10-fold cross validation's process.
Uses OptimizationStats' export_boxplot method.
Arguments:
file_name (str): Output file name.
See also:
* :func:`niaaml.utilities.OptimizationStats.export_boxplot`
"""
self.__best_stats.export_boxplot(file_name)

@staticmethod
def load(file_name):
r"""Loads Pipeline object from a file.
Expand Down Expand Up @@ -362,7 +374,7 @@ def evaluate(D, sol):
self.__parent.set_feature_transform_algorithm(feature_transform_algorithm)
self.__parent.set_classifier(classifier)
self.__parent.set_selected_features_mask(selected_features_mask)
self.__parent.set_stats(OptimizationStats(predictions, y_test, scores))
self.__parent.set_stats(OptimizationStats(predictions, y_test, scores, self.__fitness_function.Name))

return fitness
except:
Expand Down
15 changes: 14 additions & 1 deletion niaaml/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ def test_pipeline_run_works_fine(self):
s2 = set(predicted)
self.assertTrue(s2.issubset(s1))
self.assertTrue(len(s2) > 0 and len(s2) <= 2)

def test_pipeline_export_boxplot_works_fine(self):
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/tests_files/dataset_header_classes.csv', has_header=True, contains_classes=True)
self.__pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 20, 40, 'ParticleSwarmAlgorithm', 'Accuracy')

with tempfile.TemporaryDirectory() as tmp:
self.__pipeline.export_boxplot(os.path.join(tmp, 'boxplot'))
self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
self.assertEqual(1, len([name for name in os.listdir(tmp)]))

self.__pipeline.export_boxplot(os.path.join(tmp, 'boxplot.png'))
self.assertTrue(os.path.exists(os.path.join(tmp, 'boxplot.png')))
self.assertEqual(1, len([name for name in os.listdir(tmp)]))

def test_pipeline_export_works_fine(self):
with tempfile.TemporaryDirectory() as tmp:
Expand Down Expand Up @@ -81,7 +94,7 @@ def test_pipeline_setters_work_fine(self):
'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2'])
self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y, numpy.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76])))
self.__pipeline.set_stats(OptimizationStats(self.__predicted, self.__y, numpy.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]), 'Accuracy'))

self.assertIsInstance(self.__pipeline.get_classifier(), AdaBoost)
self.assertIsInstance(self.__pipeline.get_feature_selection_algorithm(), SelectPercentile)
Expand Down
3 changes: 2 additions & 1 deletion niaaml/tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ def setUp(self):
'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
'Class 2', 'Class 2'])

self.__stats = OptimizationStats(predicted, y, np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]))
self.__stats = OptimizationStats(predicted, y, np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]), 'Accuracy')

def test_works_fine(self):
self.assertEqual(self.__stats._accuracy, 0.5)
self.assertEqual(self.__stats._precision, 0.5199999999999999)
self.assertEqual(self.__stats._cohen_kappa, 0.0)
self.assertEqual(self.__stats._f1_score, 0.505050505050505)
self.assertTrue((np.array([0.88, 0.9, 0.91, 0.87, 0.7, 0.98, 0.95, 0.86, 0.88, 0.76]) == self.__stats._fitness_function_values).all())
self.assertEqual(self.__stats._fitness_function_name, 'Accuracy')

def test_export_works_fine(self):
with tempfile.TemporaryDirectory() as tmp:
Expand Down
19 changes: 13 additions & 6 deletions niaaml/utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, precision_score, cohen_kappa_score, f1_score
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt

__all__ = [
'MinMax',
Expand Down Expand Up @@ -146,21 +146,25 @@ class OptimizationStats:
_precision (float): Calculated precision.
_cohen_kappa (float): Calculated Cohen's kappa.
_f1_score (float): Calculated F1-score.
_fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process (10-fold cross validation's results).
_fitness_function_name (str): Name of the used fitness function.
"""

def __init__(self, predicted, expected, fitness_function_values, **kwargs):
def __init__(self, predicted, expected, fitness_function_values, fitness_function_name, **kwargs):
r"""Initialize the factory.
Arguments:
predicted (Iterable[any]): Array of predicted classes.
expected (Iterable[any]): Array of expected classes.
fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process.
fitness_function_values (numpy.array[float]): Array of fitness function's values in the evaluation process (10-fold cross validation's results).
fitness_function_name (str): Name of the used fitness function.
"""
self._accuracy = accuracy_score(expected, predicted)
self._precision = precision_score(expected, predicted, average='weighted')
self._cohen_kappa = cohen_kappa_score(expected, predicted)
self._f1_score = f1_score(expected, predicted, average='weighted')
self._fitness_function_values = fitness_function_values
self._fitness_function_name = fitness_function_name

def export_boxplot(self, file_name):
r"""Export boxplot of fitness function's values.
Expand All @@ -171,13 +175,16 @@ def export_boxplot(self, file_name):
if len(os.path.splitext(file_name)[1]) == 0 or os.path.splitext(file_name)[1] != '.png':
file_name = file_name + '.png'

boxplot = sns.boxplot(data=[self._fitness_function_values])
boxplot.figure.savefig(file_name)
fig, ax = plt.subplots()
ax.set_title(self._fitness_function_name)
ax.boxplot(self._fitness_function_values)
ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.savefig(file_name)

def to_string(self):
r"""User friendly representation of the object.
Returns:
str: User friendly representation of the object.
"""
return 'Accuracy: {acc},\nPrecision: {prc},\nCohen\'s kappa: {ck},\nF1-score: {f1}'.format(acc=self._accuracy, prc=self._precision, ck=self._cohen_kappa, f1=self._f1_score)
return 'Accuracy: {acc},\nPrecision: {prc},\nCohen\'s kappa: {ck},\nF1-score: {f1},\n\nFitness function\'s ({fn}) 10-fold cross validation results: {arr}'.format(acc=self._accuracy, prc=self._precision, ck=self._cohen_kappa, f1=self._f1_score, fn=self._fitness_function_name, arr=np.array2string(self._fitness_function_values, separator=', '))
68 changes: 28 additions & 40 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ python = "^3.8"
numpy = "^1.19.1"
scikit-learn = "^0.23.2"
NiaPy = "^2.0.0rc11"
seaborn = "^0.11.0"
matplotlib = "^3.3.3"

[tool.poetry.dev-dependencies]
sphinx = "^3.3.1"
Expand Down

0 comments on commit 1300818

Please sign in to comment.