Skip to content

Commit

Permalink
Merge pull request #3 from rodrigo-arenas/0.3.X
Browse files Browse the repository at this point in the history
0.3.0
  • Loading branch information
rodrigo-arenas committed May 28, 2021
2 parents 21b3745 + 793b121 commit afe563d
Show file tree
Hide file tree
Showing 16 changed files with 572 additions and 127 deletions.
28 changes: 15 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)](https://www.python.org/downloads/)

# Sklearn-genetic-opt
scikit-learn models hyperparameters tuning using evolutionary algorithms.
scikit-learn models hyperparameters tuning, using evolutionary algorithms.

This is meant to be an alternative from popular methods inside scikit-learn such as Grid Search and Random Grid Search.

Expand All @@ -24,33 +24,38 @@ pip install sklearn-genetic-opt
```python
from sklearn_genetic import GASearchCV
from sklearn_genetic.utils import plot_fitness_evolution
from sklearn.tree import DecisionTreeClassifier
from sklearn_genetic.space import Continuous, Categorical, Integer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


data = load_digits()
n_samples = len(data.images)
X = data.images.reshape((n_samples, -1))
y = data['target']
X = data['data']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

clf = DecisionTreeClassifier()
clf = RandomForestClassifier()

param_grid = {'min_weight_fraction_leaf': Continuous(0.01, 0.5, distribution='log-uniform'),
'bootstrap': Categorical([True, False]),
'max_depth': Integer(2, 30),
'max_leaf_nodes': Integer(2, 35),
'n_estimators': Integer(100, 300)}

evolved_estimator = GASearchCV(estimator=clf,
cv=3,
scoring='accuracy',
population_size=25,
generations=35,
population_size=10,
generations=25,
tournament_size=3,
elitism=True,
crossover_probability=0.8,
mutation_probability=0.1,
continuous_parameters={'min_weight_fraction_leaf': (0, 0.5)},
categorical_parameters={'criterion': ['gini', 'entropy']},
integer_parameters={'max_depth': (2, 25), 'max_leaf_nodes': (2, 35)},
param_grid=param_grid,
criteria='max',
algorithm='eaMuPlusLambda',
n_jobs=-1,
Expand All @@ -74,6 +79,3 @@ print("Stats achieved in each generation: ", evolved_estimator.history)
print("Parameters and cv scores in each iteration: ", evolved_estimator.logbook)
print("Best k solutions: ", evolved_estimator.hof)
```
### Result

![demo](./demo/geneticopt.gif)
14 changes: 9 additions & 5 deletions demo/Boson_Houses_decision_tree.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import matplotlib.pyplot as plt
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.utils import plot_fitness_evolution
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt


data = load_boston()
Expand All @@ -16,6 +17,11 @@

clf = DecisionTreeRegressor()

param_grid = {'ccp_alpha': Continuous(0, 1),
'criterion': Categorical(['mse', 'mae']),
'max_depth': Integer(2, 20),
'min_samples_split': Integer(2, 30)}

evolved_estimator = GASearchCV(clf,
cv=3,
scoring='r2',
Expand All @@ -26,11 +32,9 @@
keep_top_k=4,
crossover_probability=0.9,
mutation_probability=0.05,
continuous_parameters={'ccp_alpha': (0, 1)},
categorical_parameters={'criterion': ['mse', 'mae']},
integer_parameters={'max_depth': (2, 20), 'min_samples_split': (2, 30)},
param_grid=param_grid,
criteria='max',
algorithm='eaMuPlusLambda',
algorithm='eaMuCommaLambda',
n_jobs=-1)

evolved_estimator.fit(X_train, y_train)
Expand Down
16 changes: 10 additions & 6 deletions demo/Demo_Digits_Dataset.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import scipy.stats as stats
import numpy as np
import warnings
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Continuous, Categorical
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
import scipy.stats as stats
from sklearn.utils.fixes import loguniform
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import numpy as np
import warnings


warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -50,16 +52,18 @@

# 3. Genetic Algorithm

param_grid = {'l1_ratio': Continuous(0, 1),
'alpha': Continuous(1e-4, 1, distribution='log-uniform'),
'average': Categorical([True, False])}

evolved_estimator = GASearchCV(clf,
cv=3,
scoring='accuracy',
population_size=12,
generations=8,
tournament_size=3,
elitism=True,
continuous_parameters={'l1_ratio': (0, 1), 'alpha': (1e-4, 1)},
categorical_parameters={'average': [True, False]},
integer_parameters={},
param_grid=param_grid,
n_jobs=-1)

evolved_estimator.fit(X_train, y_train)
Expand Down
13 changes: 9 additions & 4 deletions demo/Digits_decision_tree.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import warnings
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import warnings


warnings.filterwarnings("ignore")

Expand All @@ -16,6 +18,10 @@

clf = DecisionTreeClassifier()

params_grid = {'min_weight_fraction_leaf': Continuous(0, 0.5),
'criterion': Categorical(['gini', 'entropy']),
'max_depth': Integer(2, 20), 'max_leaf_nodes': Integer(2, 30)}

evolved_estimator = GASearchCV(clf,
cv=3,
scoring='accuracy',
Expand All @@ -25,9 +31,8 @@
elitism=True,
crossover_probability=0.9,
mutation_probability=0.05,
continuous_parameters={'min_weight_fraction_leaf': (0, 0.5)},
categorical_parameters={'criterion': ['gini', 'entropy']},
integer_parameters={'max_depth': (2, 20), 'max_leaf_nodes': (2, 30)},
param_grid=params_grid,
algorithm='eaMuPlusLambda',
n_jobs=-1,
verbose=True)

Expand Down
Binary file removed demo/geneticopt.gif
Binary file not shown.
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
HERE = pathlib.Path(__file__).parent

README = (HERE / "README.md").read_text()

setup(
name="sklearn-genetic-opt",
version="0.2.1",
description="Sklearn models hyperparameters tuning using evolutionary algorithms",
version="0.3.0",
description="Scikit-lean models hyperparameters tuning, using evolutionary algorithms",
long_description=README,
long_description_content_type="text/markdown",
url="https://github.com/rodrigo-arenas/Sklearn-genetic-opt",
Expand Down
2 changes: 1 addition & 1 deletion sklearn_genetic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from sklearn_genetic.genetic_search import GASearchCV
from .genetic_search import GASearchCV

__all__ = ['GASearchCV']

0 comments on commit afe563d

Please sign in to comment.