Skip to content

Commit

Permalink
Merge pull request #32 from lacava/pipeline_fix
Browse files Browse the repository at this point in the history
Pipeline fix
  • Loading branch information
lacava committed Sep 12, 2017
2 parents ee6da54 + 13a333f commit 0351c6a
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 28 deletions.
2 changes: 1 addition & 1 deletion few/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

__version__ = '0.0.45'
__version__ = '0.0.46'
2 changes: 2 additions & 0 deletions few/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class EvaluationMixin(object):
'^2': lambda n,features,stack_float,stack_bool,labels: stack_float.pop()**2,
'^3': lambda n,features,stack_float,stack_bool,labels: stack_float.pop()**3,
'sqrt': lambda n,features,stack_float,stack_bool,labels: np.sqrt(np.abs(stack_float.pop())),
#'gauss': lambda n,features,stack_float,stack_bool,labels: np.exp(-stack_float.pop()**2),
#'gauss2': lambda n,features,stack_float,stack_bool,labels: np.exp(-(stack_float.pop()**2+stack_float.pop()**2)),
# 'rbf': lambda n,features,stack_float,stack_bool,labels: np.exp(-(np.norm(stack_float.pop()-stack_float.pop())**2)/2)
# bool operations
'!': lambda n,features,stack_float,stack_bool,labels: np.logical_not(stack_bool.pop()),
Expand Down
42 changes: 26 additions & 16 deletions few/few.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, population_size=50, generations=100,
scoring_function=None, disable_update_check=False,
elitism=True, boolean = False,classification=False,clean=False,
track_diversity=False,mdr=False,otype='f',c=True,
weight_parents=True,operators=None, lex_size=False):
weight_parents=True,operators=None, lex_size=False,normalize=True):
# sets up GP.

# Save params to be recalled later by get_params()
Expand Down Expand Up @@ -107,24 +107,25 @@ def __init__(self, population_size=50, generations=100,
self.boolean = boolean
self.classification = classification
self.clean = clean
self.ml = Pipeline([('standardScaler',StandardScaler()), ('ml', ml)])
self.ml_type = type(self.ml.named_steps['ml']).__name__
self.ml = ml
#self.pipeline = Pipeline([('standardScaler',StandardScaler()), ('ml', ml)])
self.ml_type = type(self.ml).__name__
self.track_diversity = track_diversity
self.mdr = mdr
self.otype = otype

self.normalize = normalize

# if otype is b, boolean functions must be turned on
if self.otype=='b':
self.boolean = True

# instantiate sklearn estimator according to specified machine learner
if self.ml.named_steps['ml'] is None:
if self.ml is None:
if self.classification:
self.ml = Pipeline([('standardScaler',StandardScaler()),
('ml',LogisticRegression(solver='sag'))])
self.ml = LogisticRegression(solver='sag')
else:
self.ml = Pipeline([('standardScaler',StandardScaler()),
('ml',LassoLarsCV())])
self.ml = LassoLarsCV()

if not self.scoring_function:
if self.classification:
self.scoring_function = accuracy_score
Expand All @@ -144,7 +145,7 @@ def __init__(self, population_size=50, generations=100,
#classification
type(DistanceClassifier()): 'silhouette',
})
self.fit_choice = tmp_dict[type(self.ml.named_steps['ml'])]
self.fit_choice = tmp_dict[type(self.ml)]

# Columns to always ignore when in an operator
self.non_feature_columns = ['label', 'group', 'guess']
Expand Down Expand Up @@ -188,12 +189,17 @@ def fit(self, features, labels):
print('{}\t=\t{}'.format(arg, self.get_params()[arg]))
print('')

# re-initialize pipeline (needs to be here rather than init for GridSearchCV)
if self.normalize:
self.pipeline = Pipeline([('standardScaler',StandardScaler()), ('ml', self.ml)])
else:
self.pipeline = Pipeline([('ml',self.ml)])
######################################################### initial model
# fit to original data
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self._best_score = np.mean(
[self.ml.fit(features[train],labels[train]).
[self.pipeline.fit(features[train],labels[train]).
score(features[test],labels[test])
for train, test in KFold().split(features,
labels)])
Expand Down Expand Up @@ -242,7 +248,7 @@ def fit(self, features, labels):
# order = 'F')

# calculate fitness of individuals
# fitnesses = list(map(lambda I: fitness(I,labels,self.ml),X))
# fitnesses = list(map(lambda I: fitness(I,labels,self.pipeline),X))
self.F = self.calc_fitness(self.X,labels,self.fit_choice,self.sel)

#with Parallel(n_jobs=10) as parallel:
Expand Down Expand Up @@ -286,7 +292,7 @@ def fit(self, features, labels):
try:
if self.valid_loc():
tmp_score = np.mean(
[self.ml.fit(
[self.pipeline.fit(
self.X[self.valid_loc(),:].transpose()[train],
labels[train]).
score(self.X[self.valid_loc(),:].transpose()[test],
Expand All @@ -312,7 +318,7 @@ def fit(self, features, labels):

#################################################### save best model
if self.valid_loc() and tmp_score > self._best_score:
self._best_estimator = copy.deepcopy(self.ml)
self._best_estimator = copy.deepcopy(self.pipeline)
self._best_score = tmp_score
stall_count = 0;
self._best_inds = copy.deepcopy(self.valid())
Expand Down Expand Up @@ -374,7 +380,7 @@ def fit(self, features, labels):
# training data
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self._best_estimator = self.ml.fit(features,labels)
self._best_estimator = self.pipeline.fit(features,labels)
else:
# fit final estimator to all the training data
with warnings.catch_warnings():
Expand Down Expand Up @@ -778,6 +784,9 @@ def main():
parser.add_argument('--mdr', action='store_true',dest='MDR',default=False,
help='Use MDR nodes.')

parser.add_argument('--nonorm', action='store_false',dest='NORMALIZE',default=True,
help='Disable standard scaler preprocessor.')

parser.add_argument('--diversity', action='store_true',
dest='TRACK_DIVERSITY', default=False,
help='Store diversity of feature transforms each gen.')
Expand Down Expand Up @@ -862,7 +871,8 @@ def main():
classification=args.CLASSIFICATION,clean = args.CLEAN,
track_diversity=args.TRACK_DIVERSITY,mdr=args.MDR,
otype=args.OTYPE,c=args.c, lex_size = args.LEX_SIZE,
weight_parents = args.WEIGHT_PARENTS,operators=args.OPS)
weight_parents = args.WEIGHT_PARENTS,operators=args.OPS,
normalize=args.NORMALIZE)

learner.fit(training_features, training_labels)
# pdb.set_trace()
Expand Down
10 changes: 5 additions & 5 deletions few/population.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,13 @@ def init_pop(self):
# this is needed because svm has a bug that throws valueerror
#on attribute check
seed_with_raw_features=True
elif (hasattr(self.ml.named_steps['ml'],'coef_') or
hasattr(self.ml.named_steps['ml'],'feature_importances_')):
elif (hasattr(self.pipeline.named_steps['ml'],'coef_') or
hasattr(self.pipeline.named_steps['ml'],'feature_importances_')):
# add model components with non-zero coefficients to initial
# population, in order of coefficient size
coef = (self.ml.named_steps['ml'].coef_ if
hasattr(self.ml.named_steps['ml'],'coef_') else
self.ml.named_steps['ml'].feature_importances_)
coef = (self.pipeline.named_steps['ml'].coef_ if
hasattr(self.pipeline.named_steps['ml'],'coef_') else
self.pipeline.named_steps['ml'].feature_importances_)
# compress multiple coefficients for each feature into single
# numbers (occurs with multiclass classification)
if len(coef.shape)>1:
Expand Down
12 changes: 6 additions & 6 deletions few/variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def variation(self,parents):
self.ml_type != 'SVC' and self.ml_type != 'SVR'):
# this is needed because svm has a bug that throws valueerror on
# attribute check
if hasattr(self.ml.named_steps['ml'],'coef_'):
if hasattr(self.pipeline.named_steps['ml'],'coef_'):
# for l1 regularization, filter individuals with 0 coefficients
if self.weight_parents:
weights = self.ml.named_steps['ml'].coef_
weights = self.pipeline.named_steps['ml'].coef_
if len(weights.shape)>1: # handle multi-coefficient models
weights = [np.mean(abs(c)) for c in weights.transpose()]
# softmax transformation of the weights
Expand All @@ -36,20 +36,20 @@ def variation(self,parents):
self.population_size, p=weights)))
else:
offspring = copy.deepcopy(list(
x for i,x in zip(self.ml.named_steps['ml'].coef_,
x for i,x in zip(self.pipeline.named_steps['ml'].coef_,
self.valid(parents)) if (i != 0).any()))
elif hasattr(self.ml.named_steps['ml'],'feature_importances_'):
elif hasattr(self.pipeline.named_steps['ml'],'feature_importances_'):
# for tree methods, filter our individuals with 0 feature importance
if self.weight_parents:
weights = self.ml.named_steps['ml'].feature_importances_
weights = self.pipeline.named_steps['ml'].feature_importances_
# softmax transformation of the weights
weights = np.exp(weights)/np.sum(np.exp(weights))
offspring = copy.deepcopy(list(
np.random.choice(self.valid(parents),
self.population_size, p=weights)))
else:
offspring = copy.deepcopy(list(
x for i,x in zip(self.ml.named_steps['ml'].feature_importances_,
x for i,x in zip(self.pipeline.named_steps['ml'].feature_importances_,
self.valid(parents)) if i != 0))
else:
offspring = copy.deepcopy(self.valid(parents))
Expand Down

0 comments on commit 0351c6a

Please sign in to comment.