-
Notifications
You must be signed in to change notification settings - Fork 0
/
flash.py
executable file
·105 lines (74 loc) · 3.46 KB
/
flash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import random
# cite Nair, Vivek, et al. "Finding faster configurations using FLASH." IEEE Transactions on Software Engineering (2018).
# from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVC
from tuner import SVM_TUNER
import numpy as np
from sklearn.metrics import f1_score
BUDGET = 10
POOL_SIZE = 10000
INIT_POOL_SIZE = 10
# def tune_dt(x_train, y_train, project_name):
# tuner = DT_TUNER()
# sss = StratifiedShuffleSplit(n_splits=1, test_size=.2, random_state=0)
# for train_index, tune_index in sss.split(x_train, y_train):
# x_train_flash, x_tune_flash = x_train[train_index], x_train[tune_index]
# y_train_flash, y_tune_flash = y_train.iloc[train_index], y_train.iloc[tune_index]
# best_conf = tune_with_flash(tuner, x_train_flash, y_train_flash, x_tune_flash, y_tune_flash, project_name,
# random_seed=1)
# return best_conf
def tune_with_flash(tuner, x_train, y_train, x_tune, y_tune, random_seed=0):
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
random.seed(random_seed)
# print("DEFAULT F1: " + str(measure_fitness(tuner, x_train, y_train, x_tune, y_tune, tuner.default_config)))
this_budget = BUDGET
# Make initial population
param_search_space = tuner.generate_param_pools(POOL_SIZE)
# Evaluate initial pool
evaluted_configs = random.sample(param_search_space, INIT_POOL_SIZE)
#param_search_space = list(set(param_search_space) - (set(evaluted_configs)))
f_scores = [measure_fitness(tuner, x_train, y_train, x_tune, y_tune, configs) for configs in evaluted_configs]
# print("F Score of init pool: " + str(f_scores))
# hold best values
ids = np.argsort(f_scores)[::-1][:1]
best_f = f_scores[ids[0]]
best_config = evaluted_configs[ids[0]]
# converting str value to int for CART to work
evaluted_configs = [tuner.transform_to_numeric(x) for x in evaluted_configs]
param_search_space = [tuner.transform_to_numeric(x) for x in param_search_space]
# number of eval
eval = 0
while this_budget > 0:
cart_model = DecisionTreeRegressor(random_state=0)
cart_model.fit(evaluted_configs, f_scores)
next_config_id = acquisition_fn(param_search_space, cart_model)
next_config = param_search_space.pop(next_config_id)
next_config_normal = tuner.reverse_transform_from_numeric(next_config)
next_f = measure_fitness(tuner, x_train, y_train, x_tune, y_tune, next_config_normal)
if np.isnan(next_f) or next_f == 0:
continue
f_scores.append(next_f)
evaluted_configs.append(next_config)
if isBetter(next_f, best_f):
best_config = next_config_normal
best_f = next_f
this_budget += 1
# print("new F: " + str(best_f) + " budget " + str(this_budget))
this_budget -= 1
eval += 1
# print("Eval: " + str(eval))
return best_config
def acquisition_fn(search_space, cart_model):
predicted = cart_model.predict(search_space)
ids = np.argsort(predicted)[::-1][:1]
val = predicted[ids[0]]
return ids[0]
def isBetter(new, old):
return old < new
def measure_fitness(tuner, x_train, y_train, x_tune, y_tune, configs):
clf = tuner.get_clf(configs)
clf.fit(x_train, np.ravel(y_train))
y_pred = clf.predict(x_tune)
return f1_score(y_tune, y_pred, average='micro')