Saving EOA progress

mghasemi · Mar 19, 2019 · f06826a · f06826a
1 parent 7173dcf
commit f06826a
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 19 deletions.
diff --git a/SKSurrogate/aml.py b/SKSurrogate/aml.py
@@ -607,7 +607,7 @@ def _eval(ppl):
         num_parents = kwargs.pop('num_parents', 30)
         mutation_prob = kwargs.pop('mutation_prob', .1)
         _eoa = EOA(population=Pop, fitness=_eval, num_parents=num_parents, mutation_prob=mutation_prob,
-                   term_genes=self.couldBlast, init_genes=self.couldBfirst, **kwargs)
+                   term_genes=self.couldBlast, init_genes=self.couldBfirst, check_point=self.check_point, **kwargs)
         _eoa()
         self.best_estimator_ = list(self.get_top(1).items())[0][1][0]
         return self

diff --git a/SKSurrogate/eoa.py b/SKSurrogate/eoa.py
@@ -46,6 +46,8 @@ def __init__(self, population, fitness, **kwargs):
         self.genes = kwargs.pop('genes', [])
         self.init_genes = kwargs.pop('init_genes', [])
         self.term_genes = kwargs.pop('term_genes', [])
+        self.task_name = kwargs.pop('task_name', 'EOA')
+        self.check_point = kwargs.pop('check_point', './')
         if not self.genes:
             self.find_genes()
         self.evals = OrderedDict([(_, None) for _ in self.population])
@@ -62,7 +64,53 @@ def find_genes(self):
         if not self.term_genes:
             self.term_genes = self.genes
 
+    def __save(self):
+        """
+        Logs state of the evolutionary optimization progress at each iteration
+        :return: None
+        """
+        from pickle import dumps
+        fl = open(self.check_point + self.task_name + '.eoa', 'wb')
+        info = dict(population_size=self.population_size, parents_porp=self.parents_porp, num_parents=self.num_parents,
+                    elits_porp=self.elits_porp, num_elites=self.num_elites, mutation_prob=self.mutation_prob,
+                    max_generations=self.max_generations, generation_num=self.generation_num, genes=self.genes,
+                    init_genes=self.init_genes, term_genes=self.term_genes, task_name=self.task_name,
+                    check_point=self.check_point, evals=self.evals, parents=self.parents, children=self.children)
+        fl.write(dumps(info))
+        fl.close()
+
+    def __load(self):
+        """
+        Loads previous information saved, if any
+        :return: None
+        """
+        from pickle import loads
+        try:
+            fl = open(self.check_point + self.task_name + '.eoa', 'rb')
+            info = loads(fl.read())
+            fl.close()
+            self.population_size = info['population_size']
+            self.parents_porp = info['parents_porp']
+            self.num_parents = info['num_parents']
+            self.elits_porp = info['elits_porp']
+            self.num_elites = info['num_elites']
+            self.mutation_prob = info['mutation_prob']
+            self.max_generations = info['max_generations']
+            self.generation_num = info['generation_num']
+            self.genes = info['genes']
+            self.init_genes = info['init_genes']
+            self.term_genes = info['term_genes']
+            self.task_name = info['task_name']
+            self.check_point = info['check_point']
+            self.evals = info['evals']
+            self.parents = info['parents']
+            self.children = info['children']
+        except FileNotFoundError:
+            pass
+
     def __call__(self, *args, **kwargs):
+        self.parents = self.init_pop(self)
+        self.__load()
         tqdm = None
         pbar = None
         try:
@@ -77,13 +125,18 @@ def __call__(self, *args, **kwargs):
             tqdm = None
         if tqdm is not None:
             pbar = tqdm(total=self.max_generations)
-        self.parents = self.init_pop(self)
+        pbar.update(self.generation_num)
         while not self.termination(self):
+            self.__save()
             self.generation_num += 1
             self.parents = self.fitness(self.parents)
+            for _ in self.parents:
+                self.evals[_] = self.parents[_]
             self.recomb(self)
             self.mutation(self)
             self.children = self.fitness(self.children)
+            for _ in self.children:
+                self.evals[_] = self.children[_]
             self.elitism(self)
             self.parents = self.children
             if tqdm is not None:

diff --git a/SKSurrogate/mltrace.py b/SKSurrogate/mltrace.py
@@ -96,7 +96,7 @@ class Metrics(Model):
     This table stores the calculated metrics of each stored model.
     """
     try:
-        mwtrics_id = IntegerField(primary_key=True, unique=True, null=False)
+        metrics_id = IntegerField(primary_key=True, unique=True, null=False)
         model_id = ForeignKeyField(MLModel)
         accuracy = FloatField(null=True)
         auc = FloatField(null=True)

diff --git a/SKSurrogate/structsearch.py b/SKSurrogate/structsearch.py
@@ -663,10 +663,6 @@ class SurrogateRandomCV(BaseSearchCV):
 
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
-    :param iid: boolean, default=True
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
     :param cv: int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -690,7 +686,7 @@ class SurrogateRandomCV(BaseSearchCV):
         Prints internal information about the progress of each iteration.
     """
 
-    def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
+    def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, refit=True, cv=None,
                  verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True,
                  max_iter=50, min_evals=25, regressor=None, sampling=CompactSample, radius=None, contraction=.95,
                  search_sphere=False, optimizer='scipy', scipy_solver='SLSQP', task_name='optim_task', warm_start=True,
@@ -700,7 +696,7 @@ def __init__(self, estimator, params, scoring=None, fit_params=None, n_jobs=1, i
                  optimithon_ls_bt_method='Armijo', optimithon_br_func='Carrol', optimithon_penalty=1.e6,
                  optimithon_max_iter=100, optimithon_difftool=NumericDiff.Simple()):
         super(SurrogateRandomCV, self).__init__(estimator=estimator, scoring=scoring, fit_params=fit_params,
-                                                n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+                                                n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
                                                 pre_dispatch=pre_dispatch, error_score=error_score,
                                                 return_train_score=return_train_score)
         self.params = params
@@ -821,19 +817,19 @@ def obj(x):
             score = 0
             n_test = 0
             for train, test in cv_dat:
-                #if True:
                 try:
                     _score = _fit_and_score(estimator=cl, X=X, y=y, scorer=self.scorer_,
                                             train=train, test=test, verbose=self.verbose,
                                             parameters=cand_params, fit_params=self.fit_params,
                                             error_score=self.error_score)[0]
-                    if self.iid:
-                        score += _score * len(test)
-                        n_test += len(test)
-                    else:
-                        score += _score
-                        n_test += 1
-                #else:
+                    #if self.iid:
+                    #    score += _score * len(test)
+                    #    n_test += len(test)
+                    #else:
+                    #    score += _score
+                    #    n_test += 1
+                    score += _score
+                    n_test += 1
                 except ValueError:
                     pass
                 except:# LightGBMError:

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name='SKSurrogate',
-    version='0.0.4',
+    version='0.0.5',
     author='Mehdi Ghasemi',
     author_email='mehdi.ghasemi@gmail.com',
     packages=['SKSurrogate'],
@@ -18,5 +18,5 @@
     keywords=["Auto Machine Learning", "AutoML", "Optimization", "Surrogate Optimization", "Numerical",
               "Machine Learning", "Regression", "Random Search"],
     install_requires=['numpy', 'scipy', 'pandas', 'matplotlib', 'scikit-learn', 'SALib', 'imbalanced-learn',
-                      'eli5', 'tqdm', 'peewee', 'category_encoders']
+                      'eli5', 'skrebate', 'tqdm', 'peewee', 'category_encoders']
 )