Skip to content

Commit

Permalink
Saving EOA progress
Browse files Browse the repository at this point in the history
  • Loading branch information
mghasemi committed Mar 19, 2019
1 parent 7173dcf commit f06826a
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 19 deletions.
2 changes: 1 addition & 1 deletion SKSurrogate/aml.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def _eval(ppl):
num_parents = kwargs.pop('num_parents', 30)
mutation_prob = kwargs.pop('mutation_prob', .1)
_eoa = EOA(population=Pop, fitness=_eval, num_parents=num_parents, mutation_prob=mutation_prob,
term_genes=self.couldBlast, init_genes=self.couldBfirst, **kwargs)
term_genes=self.couldBlast, init_genes=self.couldBfirst, check_point=self.check_point, **kwargs)
_eoa()
self.best_estimator_ = list(self.get_top(1).items())[0][1][0]
return self
Expand Down
55 changes: 54 additions & 1 deletion SKSurrogate/eoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def __init__(self, population, fitness, **kwargs):
self.genes = kwargs.pop('genes', [])
self.init_genes = kwargs.pop('init_genes', [])
self.term_genes = kwargs.pop('term_genes', [])
self.task_name = kwargs.pop('task_name', 'EOA')
self.check_point = kwargs.pop('check_point', './')
if not self.genes:
self.find_genes()
self.evals = OrderedDict([(_, None) for _ in self.population])
Expand All @@ -62,7 +64,53 @@ def find_genes(self):
if not self.term_genes:
self.term_genes = self.genes

def __save(self):
"""
Logs state of the evolutionary optimization progress at each iteration
:return: None
"""
from pickle import dumps
fl = open(self.check_point + self.task_name + '.eoa', 'wb')
info = dict(population_size=self.population_size, parents_porp=self.parents_porp, num_parents=self.num_parents,
elits_porp=self.elits_porp, num_elites=self.num_elites, mutation_prob=self.mutation_prob,
max_generations=self.max_generations, generation_num=self.generation_num, genes=self.genes,
init_genes=self.init_genes, term_genes=self.term_genes, task_name=self.task_name,
check_point=self.check_point, evals=self.evals, parents=self.parents, children=self.children)
fl.write(dumps(info))
fl.close()

def __load(self):
"""
Loads previous information saved, if any
:return: None
"""
from pickle import loads
try:
fl = open(self.check_point + self.task_name + '.eoa', 'rb')
info = loads(fl.read())
fl.close()
self.population_size = info['population_size']
self.parents_porp = info['parents_porp']
self.num_parents = info['num_parents']
self.elits_porp = info['elits_porp']
self.num_elites = info['num_elites']
self.mutation_prob = info['mutation_prob']
self.max_generations = info['max_generations']
self.generation_num = info['generation_num']
self.genes = info['genes']
self.init_genes = info['init_genes']
self.term_genes = info['term_genes']
self.task_name = info['task_name']
self.check_point = info['check_point']
self.evals = info['evals']
self.parents = info['parents']
self.children = info['children']
except FileNotFoundError:
pass

def __call__(self, *args, **kwargs):
self.parents = self.init_pop(self)
self.__load()
tqdm = None
pbar = None
try:
Expand All @@ -77,13 +125,18 @@ def __call__(self, *args, **kwargs):
tqdm = None
if tqdm is not None:
pbar = tqdm(total=self.max_generations)
self.parents = self.init_pop(self)
pbar.update(self.generation_num)
while not self.termination(self):
self.__save()
self.generation_num += 1
self.parents = self.fitness(self.parents)
for _ in self.parents:
self.evals[_] = self.parents[_]
self.recomb(self)
self.mutation(self)
self.children = self.fitness(self.children)
for _ in self.children:
self.evals[_] = self.children[_]
self.elitism(self)
self.parents = self.children
if tqdm is not None:
Expand Down
2 changes: 1 addition & 1 deletion SKSurrogate/mltrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class Metrics(Model):
This table stores the calculated metrics of each stored model.
"""
try:
mwtrics_id = IntegerField(primary_key=True, unique=True, null=False)
metrics_id = IntegerField(primary_key=True, unique=True, null=False)
model_id = ForeignKeyField(MLModel)
accuracy = FloatField(null=True)
auc = FloatField(null=True)
Expand Down
24 changes: 10 additions & 14 deletions SKSurrogate/structsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,10 +663,6 @@ class SurrogateRandomCV(BaseSearchCV):
- A string, giving an expression as a function of n_jobs,
as in '2*n_jobs'
:param iid: boolean, default=True
If True, the data is assumed to be identically distributed across
the folds, and the loss minimized is the total loss per sample,
and not the mean loss across the folds.
:param cv: int, cross-validation generator or an iterable, optional
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand All @@ -690,7 +686,7 @@ class SurrogateRandomCV(BaseSearchCV):
Prints internal information about the progress of each iteration.
"""

def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True,
max_iter=50, min_evals=25, regressor=None, sampling=CompactSample, radius=None, contraction=.95,
search_sphere=False, optimizer='scipy', scipy_solver='SLSQP', task_name='optim_task', warm_start=True,
Expand All @@ -700,7 +696,7 @@ def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, i
optimithon_ls_bt_method='Armijo', optimithon_br_func='Carrol', optimithon_penalty=1.e6,
optimithon_max_iter=100, optimithon_difftool=NumericDiff.Simple()):
super(SurrogateRandomCV, self).__init__(estimator=estimator, scoring=scoring, fit_params=fit_params,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
self.params = params
Expand Down Expand Up @@ -821,19 +817,19 @@ def obj(x):
score = 0
n_test = 0
for train, test in cv_dat:
#if True:
try:
_score = _fit_and_score(estimator=cl, X=X, y=y, scorer=self.scorer_,
train=train, test=test, verbose=self.verbose,
parameters=cand_params, fit_params=self.fit_params,
error_score=self.error_score)[0]
if self.iid:
score += _score * len(test)
n_test += len(test)
else:
score += _score
n_test += 1
#else:
#if self.iid:
# score += _score * len(test)
# n_test += len(test)
#else:
# score += _score
# n_test += 1
score += _score
n_test += 1
except ValueError:
pass
except:# LightGBMError:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='SKSurrogate',
version='0.0.4',
version='0.0.5',
author='Mehdi Ghasemi',
author_email='mehdi.ghasemi@gmail.com',
packages=['SKSurrogate'],
Expand All @@ -18,5 +18,5 @@
keywords=["Auto Machine Learning", "AutoML", "Optimization", "Surrogate Optimization", "Numerical",
"Machine Learning", "Regression", "Random Search"],
install_requires=['numpy', 'scipy', 'pandas', 'matplotlib', 'scikit-learn', 'SALib', 'imbalanced-learn',
'eli5', 'tqdm', 'peewee', 'category_encoders']
'eli5', 'skrebate', 'tqdm', 'peewee', 'category_encoders']
)

0 comments on commit f06826a

Please sign in to comment.