From f4503bb3a3be014b452f54d8e2d187bb6419f627 Mon Sep 17 00:00:00 2001 From: Jin Haifeng Date: Wed, 1 Aug 2018 13:15:37 -0500 Subject: [PATCH] Devel (#27) * default * update * bn * middle * middle * graph test passed * pytorch * req * ci * ci * ci --- .travis.yml | 2 + autokeras/bayesian.py | 2 +- autokeras/classifier.py | 76 ++++---- autokeras/constant.py | 3 + autokeras/generator.py | 93 ++-------- autokeras/graph.py | 184 +++++++++--------- autokeras/layer_transformer.py | 87 ++++----- autokeras/layers.py | 249 +++++++++++++------------ autokeras/net_transformer.py | 14 +- autokeras/preprocessor.py | 49 +++++ autokeras/search.py | 15 +- autokeras/stub.py | 50 ----- autokeras/utils.py | 151 +++++++-------- experiments/bn.py | 25 +++ experiments/default.py | 30 +++ experiments/pytorch_cifar10.py | 141 ++++++++++++++ experiments/tc.py | 68 +++++++ requirements.txt | 23 +-- tests/common.py | 318 +++++++++++++++++++------------- tests/test_bayesian.py | 23 ++- tests/test_classifier.py | 24 --- tests/test_generator.py | 14 +- tests/test_graph.py | 158 ++++++++-------- tests/test_layer_transformer.py | 41 ++-- tests/test_layers.py | 17 +- tests/test_net_transformer.py | 17 +- tests/test_search.py | 22 +-- tests/test_stub.py | 14 -- tests/test_utils.py | 19 +- 29 files changed, 1058 insertions(+), 871 deletions(-) delete mode 100644 autokeras/stub.py create mode 100644 experiments/bn.py create mode 100644 experiments/default.py create mode 100644 experiments/pytorch_cifar10.py create mode 100644 experiments/tc.py delete mode 100644 tests/test_stub.py diff --git a/.travis.yml b/.travis.yml index e0ebaa082..9075656ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,8 @@ python: - "3.6" before_install: - sudo apt-get install graphviz +install: + - pip install -r requirements.txt --quiet script: - pytest tests --cov=autokeras after_success: diff --git a/autokeras/bayesian.py b/autokeras/bayesian.py index 0b493989c..ae8270166 100644 --- a/autokeras/bayesian.py +++ b/autokeras/bayesian.py @@ -21,7 +21,7 @@ def layers_distance(list_a, list_b): for i in range(len_a): for j in range(len_b): f[i][j] = min(f[i][j - 1] + 1, f[i - 1][j] + 1, f[i - 1][j - 1] + layer_distance(list_a[i], list_b[j])) - return f[len_a][len_b] + return f[len_a - 1][len_b - 1] def skip_connection_distance(a, b): diff --git a/autokeras/classifier.py b/autokeras/classifier.py index 73c0c4c87..3c61e1a6a 100644 --- a/autokeras/classifier.py +++ b/autokeras/classifier.py @@ -2,17 +2,19 @@ import pickle import csv import time -import tensorflow as tf +from functools import reduce + +import torch import scipy.ndimage as ndimage import numpy as np -from keras import backend from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split +from torch.utils.data import DataLoader from autokeras import constant -from autokeras.preprocessor import OneHotEncoder +from autokeras.preprocessor import OneHotEncoder, DataTransformer from autokeras.search import BayesianSearcher, train from autokeras.utils import ensure_dir, has_file, pickle_from_file, pickle_to_file @@ -31,16 +33,12 @@ def _validate(x_train, y_train): raise ValueError('x_train and y_train should have the same number of instances.') -def run_searcher_once(x_train, y_train, x_test, y_test, path): +def run_searcher_once(train_data, test_data, path): if constant.LIMIT_MEMORY: - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - sess = tf.Session(config=config) - init = tf.global_variables_initializer() - sess.run(init) - backend.set_session(sess) + # TODO: limit pytorch memory. + pass searcher = pickle_from_file(os.path.join(path, 'searcher')) - searcher.search(x_train, y_train, x_test, y_test) + searcher.search(train_data, test_data) def read_csv_file(csv_file_path): @@ -150,6 +148,7 @@ def __init__(self, verbose=False, path=constant.DEFAULT_SAVE_PATH, resume=False, self.path = path else: self.y_encoder = None + self.data_transformer = None self.verbose = verbose self.searcher = False self.path = path @@ -186,6 +185,10 @@ def fit(self, x_train=None, y_train=None, time_limit=None): y_train = self.y_encoder.transform(y_train) + # Transform x_train + if self.data_transformer is None: + self.data_transformer = DataTransformer(x_train) + # Create the searcher and save on disk if not self.searcher: input_shape = x_train.shape[1:] @@ -199,8 +202,12 @@ def fit(self, x_train=None, y_train=None, time_limit=None): self.searcher = True # Divide training data into training and testing data. - x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.25, random_state=42) + x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, + test_size=constant.VALIDATION_SET_RATIO, + random_state=42) + train_data = self.data_transformer.transform_train(x_train, y_train) + test_data = self.data_transformer.transform_test(x_test, y_test) pickle.dump(self, open(os.path.join(self.path, 'classifier'), 'wb')) pickle_to_file(self, os.path.join(self.path, 'classifier')) @@ -209,7 +216,7 @@ def fit(self, x_train=None, y_train=None, time_limit=None): start_time = time.time() while time.time() - start_time <= time_limit: - run_searcher_once(x_train, y_train, x_test, y_test, self.path) + run_searcher_once(train_data, test_data, self.path) if len(self.load_searcher().history) >= constant.MAX_MODEL_NUM: break @@ -223,20 +230,19 @@ def predict(self, x_test): An numpy.ndarray containing the results. """ if constant.LIMIT_MEMORY: - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - sess = tf.Session(config=config) - init = tf.global_variables_initializer() - sess.run(init) - backend.set_session(sess) - x_test = x_test.astype('float32') / 255 + # TODO: limit pytorch memory. + pass + test_data = self.data_transformer.transform_test(x_test) + test_loader = DataLoader(test_data, batch_size=constant.MAX_BATCH_SIZE, shuffle=True) model = self.load_searcher().load_best_model().produce_model() - return self.y_encoder.inverse_transform(model.predict(x_test, )) + model.eval() - def summary(self): - """Print the summary of the best model.""" - model = self.load_searcher().load_best_model() - model.summary() + outputs = [] + with torch.no_grad(): + for index, inputs in enumerate(test_loader): + outputs.append(model(inputs).numpy()) + output = reduce(lambda x, y: np.concatenate((x, y)), outputs) + return self.y_encoder.inverse_transform(output) def evaluate(self, x_test, y_test): """Return the accuracy score between predict value and test_y.""" @@ -262,25 +268,19 @@ def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain """ if trainer_args is None: trainer_args = {} + y_train = self.y_encoder.transform(y_train) y_test = self.y_encoder.transform(y_test) + + train_data = self.data_transformer.transform_train(x_train, y_train) + test_data = self.data_transformer.transform_test(x_test, y_test) + searcher = self.load_searcher() graph = searcher.load_best_model() + if retrain: graph.weighted = False - _, _1, graph = train((graph, x_train, y_train, x_test, y_test, trainer_args, None)) - - def export_keras_model(self, path, model_id=None): - """Export the searched model as a Keras saved model. - - Args: - path: A string. The path to the file to save. - model_id: A integer. If not provided, the function will export the best model. - """ - if model_id is None: - model_id = self.get_best_model_id() - graph = self.load_searcher().load_model_by_id(model_id) - graph.produce_model().save(path) + _, _1, graph = train((graph, train_data, test_data, trainer_args, None)) def get_best_model_id(self): """ diff --git a/autokeras/constant.py b/autokeras/constant.py index 69d8040d3..8efb6ec34 100644 --- a/autokeras/constant.py +++ b/autokeras/constant.py @@ -1,5 +1,8 @@ DEFAULT_SAVE_PATH = '/tmp/autokeras/' +# Data + +VALIDATION_SET_RATIO = 0.08333 # Searcher diff --git a/autokeras/generator.py b/autokeras/generator.py index 227f4303a..fcc5e8503 100644 --- a/autokeras/generator.py +++ b/autokeras/generator.py @@ -1,16 +1,7 @@ -from random import randint, random - -from keras import Input, Model -from keras.layers import MaxPooling1D, MaxPooling2D, MaxPooling3D, Dropout, Flatten, Dense, BatchNormalization, \ - Activation, GlobalAveragePooling2D -from keras.losses import categorical_crossentropy -from keras.optimizers import Adadelta, Adam - from autokeras import constant from autokeras.graph import Graph -from autokeras.layers import get_conv_layer_func, get_ave_layer_func, StubBatchNormalization, StubActivation, StubConv, \ - StubDropout, StubPooling, StubGlobalPooling, StubDense, StubInput -from autokeras.stub import StubModel +from autokeras.layers import StubBatchNormalization, StubConv, StubDropout, StubPooling, StubDense, StubFlatten, \ + StubReLU, StubSoftmax class ClassifierGenerator: @@ -22,10 +13,6 @@ def __init__(self, n_classes, input_shape): if len(self.input_shape) < 2: raise ValueError('The input dimension is too low.') - def _get_pool_layer_func(self): - pool_funcs = [MaxPooling1D, MaxPooling2D, MaxPooling3D] - return pool_funcs[len(self.input_shape) - 2] - def _get_shape(self, dim_size): temp_list = [(dim_size,), (dim_size, dim_size), (dim_size, dim_size, dim_size)] return temp_list[len(self.input_shape) - 2] @@ -36,69 +23,21 @@ def __init__(self, n_classes, input_shape): super().__init__(n_classes, input_shape) def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDTH): - pool = self._get_pool_layer_func() - conv = get_conv_layer_func(len(self._get_shape(3))) - ave = get_ave_layer_func(len(self._get_shape(3))) - pooling_len = int(model_len / 4) - model = StubModel() - model.input_shape = self.input_shape - model.inputs = [0] - model.layers.append(StubInput()) + graph = Graph(self.input_shape, False) + temp_input_channel = self.input_shape[-1] + output_node_id = 0 for i in range(model_len): - model.layers += [StubActivation('relu'), - StubConv(model_width, kernel_size=3, func=conv), - StubBatchNormalization(), - StubDropout(constant.CONV_DROPOUT_RATE)] + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(temp_input_channel, model_width, kernel_size=3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(model_width), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + temp_input_channel = model_width if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): - model.layers.append(StubPooling(func=pool)) - - model.layers.append(StubGlobalPooling(ave)) - model.layers.append(StubDense(self.n_classes, activation='softmax')) - model.outputs = [len(model.layers)] - for index, layer in enumerate(model.layers): - layer.input = index - layer.output = index + 1 - return Graph(model, False) - - -class RandomConvClassifierGenerator(ClassifierGenerator): - def __init__(self, n_classes, input_shape): - super().__init__(n_classes, input_shape) - - def generate(self): - """Return the random generated CNN model.""" - conv_num = randint(1, 10) - dense_num = randint(1, 10) - dropout_rate = random() - filter_size = randint(1, 2) * 2 + 1 - pool_size = randint(2, 3) - filter_shape = self._get_shape(filter_size) - pool_shape = self._get_shape(pool_size) - pool = self._get_pool_layer_func() - conv = get_conv_layer_func(len(filter_shape)) + output_node_id = graph.add_layer(StubPooling(), output_node_id) - input_tensor = Input(shape=self.input_shape) - output_tensor = input_tensor - for i in range(conv_num): - kernel_num = randint(10, 30) - output_tensor = conv(kernel_num, filter_shape, - padding='same')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Activation('relu')(output_tensor) - if random() > 0.5: - output_tensor = pool(pool_size=pool_shape, padding='same')(output_tensor) - if random() > 0.5: - output_tensor = Dropout(dropout_rate)(output_tensor) - output_tensor = Flatten()(output_tensor) - for i in range(dense_num): - node_num = randint(128, 1024) - output_tensor = Dense(node_num, activation='relu')(output_tensor) - if random() > 0.5: - output_tensor = Dropout(dropout_rate)(output_tensor) - output_tensor = Dense(self.n_classes, activation='softmax')(output_tensor) - model = Model(input_tensor, output_tensor) - model.compile(loss='categorical_crossentropy', - optimizer=Adam(), - metrics=['accuracy']) - return model + output_node_id = graph.add_layer(StubFlatten(), output_node_id) + output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], self.n_classes), + output_node_id) + graph.add_layer(StubSoftmax(), output_node_id) + return graph diff --git a/autokeras/graph.py b/autokeras/graph.py index 208f6c351..f069eb33e 100644 --- a/autokeras/graph.py +++ b/autokeras/graph.py @@ -1,18 +1,15 @@ from copy import deepcopy +from itertools import chain from queue import Queue import numpy as np - -from keras import Input -from keras.engine import Model -from keras.layers import Concatenate, Dropout, Activation, Add +import torch from autokeras import constant from autokeras.layer_transformer import wider_bn, wider_next_conv, wider_next_dense, wider_pre_dense, wider_pre_conv, \ deeper_conv_block, dense_to_deeper_block, add_noise -from autokeras.layers import StubConcatenate, StubAdd, StubConv, is_layer, layer_width, \ - to_real_layer -from autokeras.stub import to_stub_model +from autokeras.layers import StubConcatenate, StubAdd, StubConv, is_layer, layer_width, to_real_layer, \ + set_torch_weight_to_stub, set_stub_weight_to_torch class NetworkDescriptor: @@ -51,6 +48,11 @@ def to_json(self): return {'node_list': self.conv_widths, 'skip_list': skip_list} +class Node: + def __init__(self, shape): + self.shape = shape + + class Graph: """A class represent the neural architecture graph of a Keras model. @@ -79,13 +81,8 @@ class Graph: during the network morphism. """ - def __init__(self, model, weighted=True): - model = to_stub_model(model, weighted) - layers = model.layers[1:] + def __init__(self, input_shape, weighted=True): self.weighted = weighted - self.input_shape = model.input_shape - if self.input_shape[0] is None: - self.input_shape = self.input_shape[1:] self.node_list = [] self.layer_list = [] # node id start with 0 @@ -98,29 +95,21 @@ def __init__(self, model, weighted=True): self.operation_history = [] self.vis = None + self._add_node(Node(input_shape)) - # Add all nodes - for layer in layers: - if isinstance(layer.input, list): - for temp_input in layer.input: - if temp_input not in self.node_list: - self._add_node(temp_input) - else: - if layer.input not in self.node_list: - self._add_node(layer.input) - self._add_node(layer.output) - - # Add all edges - for layer in layers: - if isinstance(layer.input, list): - for temp_input in layer.input: - self._add_edge(layer, - self.node_to_id[temp_input], - self.node_to_id[layer.output]) - else: - self._add_edge(layer, - self.node_to_id[layer.input], - self.node_to_id[layer.output]) + def add_layer(self, layer, input_node_id): + if isinstance(input_node_id, list): + layer.input = list(map(lambda x: self.node_list[x], input_node_id)) + output_node_id = self._add_node(Node(layer.output_shape)) + for node_id in input_node_id: + self._add_edge(layer, node_id, output_node_id) + + else: + layer.input = self.node_list[input_node_id] + output_node_id = self._add_node(Node(layer.output_shape)) + self._add_edge(layer, input_node_id, output_node_id) + + return output_node_id def clear_operation_history(self): self.operation_history = [] @@ -142,6 +131,7 @@ def _add_node(self, node): self.node_list.append(node) self.adj_list[node_id] = [] self.reverse_adj_list[node_id] = [] + return node_id def _add_new_node(self): node_value = len(self.node_list) @@ -193,11 +183,14 @@ def _redirect_edge(self, u_id, v_id, new_v_id): def _replace_layer(self, layer_id, new_layer): """Replace the layer with a new layer.""" old_layer = self.layer_list[layer_id] + new_layer.input = old_layer.input + new_layer.output = old_layer.output self.layer_list[layer_id] = new_layer self.layer_to_id[new_layer] = layer_id self.layer_to_id.pop(old_layer) - def _topological_order(self): + @property + def topological_order(self): """Return the topological order of the node ids.""" q = Queue() in_degree = {} @@ -308,7 +301,7 @@ def _upper_layer_width(self, u): return self._upper_layer_width(a) + self._upper_layer_width(b) else: return self._upper_layer_width(v) - return self.input_shape[-1] + return self.node_list[0][-1] def to_conv_deeper_model(self, target_id, kernel_size): """Insert a relu-conv-bn block after the target block. @@ -405,7 +398,7 @@ def to_add_skip_model(self, start_id, end_id): skip_output_id = new_node_id # Add the conv layer - layer2 = StubConv(self.layer_list[end_id].filters, 1, self.layer_list[end_id].func) + layer2 = StubConv(self.layer_list[start_id].filters, self.layer_list[end_id].filters, 1) new_node_id = self._add_new_node() self._add_edge(layer2, skip_output_id, new_node_id) skip_output_id = new_node_id @@ -415,7 +408,7 @@ def to_add_skip_model(self, start_id, end_id): filters_end = self.layer_list[end_id].filters filters_start = self.layer_list[start_id].filters filter_shape = (1,) * (len(self.layer_list[end_id].get_weights()[0].shape) - 2) - weights = np.zeros(filter_shape + (filters_start, filters_end)) + weights = np.zeros((filters_end, filters_start) + filter_shape) bias = np.zeros(filters_end) layer2.set_weights((add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) @@ -454,18 +447,19 @@ def to_concat_skip_model(self, start_id, end_id): new_node_id = self._add_new_node() layer = StubConcatenate() new_node_id2 = self._add_new_node() - layer2 = StubConv(self.layer_list[end_id].filters, 1, self.layer_list[end_id].func) + layer2 = StubConv(self.layer_list[start_id].filters + self.layer_list[end_id].filters, + self.layer_list[end_id].filters, 1) if self.weighted: filters_end = self.layer_list[end_id].filters filters_start = self.layer_list[start_id].filters filter_shape = (1,) * (len(self.layer_list[end_id].get_weights()[0].shape) - 2) - weights = np.zeros(filter_shape + (filters_end, filters_end)) + weights = np.zeros((filters_end, filters_end) + filter_shape) for i in range(filters_end): - filter_weight = np.zeros(filter_shape + (filters_end,)) - filter_weight[(0, 0, i)] = 1 - weights[..., i] = filter_weight + filter_weight = np.zeros((filters_end,) + filter_shape) + filter_weight[(i, 0, 0)] = 1 + weights[i, ...] = filter_weight weights = np.concatenate((weights, - np.zeros(filter_shape + (filters_start, filters_end))), axis=2) + np.zeros((filters_end, filters_start) + filter_shape)), axis=1) bias = np.zeros(filters_end) layer2.set_weights((add_noise(weights, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) @@ -482,7 +476,7 @@ def to_concat_skip_model(self, start_id, end_id): def extract_descriptor(self): ret = NetworkDescriptor() - topological_node_list = self._topological_order() + topological_node_list = self.topological_order for u in topological_node_list: for v, layer_id in self.adj_list[u]: layer = self.layer_list[layer_id] @@ -518,46 +512,11 @@ def extract_descriptor(self): def produce_model(self): """Build a new Keras model based on the current graph.""" - input_tensor = Input(shape=self.input_shape) - topo_node_list = self._topological_order() - output_id = topo_node_list[-1] - input_id = topo_node_list[0] - - new_to_old_layer = {} - - node_list = deepcopy(self.node_list) - node_list[input_id] = input_tensor - - node_to_id = deepcopy(self.node_to_id) - node_to_id[input_tensor] = input_id - - for v in topo_node_list: - for u, layer_id in self.reverse_adj_list[v]: - layer = self.layer_list[layer_id] - - if isinstance(layer, (StubAdd, StubConcatenate)): - edge_input_tensor = list(map(lambda x: node_list[x], - self.layer_id_to_input_node_ids[layer_id])) - else: - edge_input_tensor = node_list[u] - - new_layer = to_real_layer(layer) - new_to_old_layer[new_layer] = layer - - temp_tensor = new_layer(edge_input_tensor) - node_list[v] = temp_tensor - node_to_id[temp_tensor] = v - model = Model(input_tensor, node_list[output_id]) - for layer in model.layers[1:]: - if not isinstance(layer, (Activation, Dropout, Concatenate, Add)): - old_layer = new_to_old_layer[layer] - if self.weighted: - layer.set_weights(old_layer.get_weights()) - return model + return TorchModel(self) def _layer_ids_in_order(self, layer_ids): node_id_to_order_index = {} - for index, node_id in enumerate(self._topological_order()): + for index, node_id in enumerate(self.topological_order): node_id_to_order_index[node_id] = index return sorted(layer_ids, key=lambda layer_id: @@ -568,7 +527,7 @@ def _layer_ids_by_type(self, type_str): def _conv_layer_ids_in_order(self): return self._layer_ids_in_order( - list(filter(lambda layer_id: self.layer_list[layer_id].kernel_size not in [1, (1,), (1, 1), (1, 1, 1)], + list(filter(lambda layer_id: self.layer_list[layer_id].kernel_size != 1, self._layer_ids_by_type('Conv')))) def _dense_layer_ids_in_order(self): @@ -582,3 +541,60 @@ def wide_layer_ids(self): def skip_connection_layer_ids(self): return self._conv_layer_ids_in_order()[:-1] + + +class TorchModel(torch.nn.Module): + def __init__(self, graph): + super(TorchModel, self).__init__() + self.graph = graph + self.layers = [] + for layer in graph.layer_list: + self.layers.append(to_real_layer(layer)) + if graph.weighted: + for index, layer in enumerate(self.layers): + set_stub_weight_to_torch(self.graph.layer_list[index], layer) + + def forward(self, input_tensor): + """Build a new Keras model based on the current graph.""" + topo_node_list = self.graph.topological_order + output_id = topo_node_list[-1] + input_id = topo_node_list[0] + + node_list = deepcopy(self.graph.node_list) + node_list[input_id] = input_tensor + + for v in topo_node_list: + for u, layer_id in self.graph.reverse_adj_list[v]: + layer = self.graph.layer_list[layer_id] + torch_layer = self.layers[layer_id] + + if isinstance(layer, (StubAdd, StubConcatenate)): + edge_input_tensor = list(map(lambda x: node_list[x], + self.graph.layer_id_to_input_node_ids[layer_id])) + else: + edge_input_tensor = node_list[u] + + temp_tensor = torch_layer(edge_input_tensor) + node_list[v] = temp_tensor + return node_list[output_id] + + def set_weight_to_graph(self): + self.graph.weighted = True + for index, layer in enumerate(self.layers): + set_torch_weight_to_stub(layer, self.graph.layer_list[index]) + + def eval(self): + super().eval() + for layer in self.layers: + layer.eval() + + def train(self, mode=True): + super().train() + for layer in self.layers: + layer.train() + + def parameters(self): + parameters = [] + for layer in self.layers: + parameters += list(layer.parameters()) + return parameters diff --git a/autokeras/layer_transformer.py b/autokeras/layer_transformer.py index c09d854ca..d90ef7f12 100644 --- a/autokeras/layer_transformer.py +++ b/autokeras/layer_transformer.py @@ -1,8 +1,7 @@ import numpy as np from autokeras import constant -from autokeras.layers import StubConv, StubBatchNormalization, StubActivation, StubDropout, StubDense, \ - StubAdd +from autokeras.layers import StubConv, StubBatchNormalization, StubDropout, StubDense, StubReLU NOISE_RATIO = 1e-4 @@ -18,18 +17,18 @@ def deeper_conv_block(conv_layer, kernel_size, weighted=True): filter_weight[index] = 1 weight[..., i] = filter_weight bias = np.zeros(n_filters) - new_conv_layer = StubConv(n_filters, kernel_size=filter_shape, func=conv_layer.func) - bn = StubBatchNormalization() + new_conv_layer = StubConv(conv_layer.filters, n_filters, kernel_size=kernel_size) + bn = StubBatchNormalization(n_filters) if weighted: new_conv_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - new_weights = [np.ones(n_filters, dtype=np.float32), - np.zeros(n_filters, dtype=np.float32), - np.zeros(n_filters, dtype=np.float32), - np.ones(n_filters, dtype=np.float32)] + new_weights = [add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_filters, dtype=np.float32), np.array([0, 1])), + add_noise(np.ones(n_filters, dtype=np.float32), np.array([0, 1]))] bn.set_weights(new_weights) - return [StubActivation('relu'), + return [StubReLU(), new_conv_layer, bn, StubDropout(constant.CONV_DROPOUT_RATE)] @@ -39,15 +38,15 @@ def dense_to_deeper_block(dense_layer, weighted=True): units = dense_layer.units weight = np.eye(units) bias = np.zeros(units) - new_dense_layer = StubDense(units, dense_layer.activation) + new_dense_layer = StubDense(units, units) if weighted: new_dense_layer.set_weights((add_noise(weight, np.array([0, 1])), add_noise(bias, np.array([0, 1])))) - return [new_dense_layer, StubDropout(constant.DENSE_DROPOUT_RATE)] + return [StubReLU(), new_dense_layer, StubDropout(constant.DENSE_DROPOUT_RATE)] def wider_pre_dense(layer, n_add, weighted=True): if not weighted: - return StubDense(layer.units + n_add, layer.activation) + return StubDense(layer.input_units, layer.units + n_add) n_units2 = layer.units @@ -59,12 +58,12 @@ def wider_pre_dense(layer, n_add, weighted=True): # target layer update (i) for i in range(n_add): teacher_index = rand[i] - new_weight = teacher_w[:, teacher_index] - new_weight = new_weight[:, np.newaxis] - student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=1) + new_weight = teacher_w[teacher_index, :] + new_weight = new_weight[np.newaxis, :] + student_w = np.concatenate((student_w, add_noise(new_weight, student_w)), axis=0) student_b = np.append(student_b, add_noise(teacher_b[teacher_index], student_b)) - new_pre_layer = StubDense(n_units2 + n_add, layer.activation) + new_pre_layer = StubDense(layer.input_units, n_units2 + n_add) new_pre_layer.set_weights((student_w, student_b)) return new_pre_layer @@ -72,9 +71,8 @@ def wider_pre_dense(layer, n_add, weighted=True): def wider_pre_conv(layer, n_add_filters, weighted=True): if not weighted: - return StubConv(layer.filters + n_add_filters, kernel_size=layer.kernel_size, func=layer.func) + return StubConv(layer.input_channel, layer.filters + n_add_filters, kernel_size=layer.kernel_size) - pre_filter_shape = layer.kernel_size n_pre_filters = layer.filters rand = np.random.randint(n_pre_filters, size=n_add_filters) teacher_w, teacher_b = layer.get_weights() @@ -83,83 +81,72 @@ def wider_pre_conv(layer, n_add_filters, weighted=True): # target layer update (i) for i in range(len(rand)): teacher_index = rand[i] - new_weight = teacher_w[..., teacher_index] - new_weight = new_weight[..., np.newaxis] - student_w = np.concatenate((student_w, new_weight), axis=-1) + new_weight = teacher_w[teacher_index, ...] + new_weight = new_weight[np.newaxis, ...] + student_w = np.concatenate((student_w, new_weight), axis=0) student_b = np.append(student_b, teacher_b[teacher_index]) - new_pre_layer = StubConv(n_pre_filters + n_add_filters, kernel_size=pre_filter_shape, func=layer.func) + new_pre_layer = StubConv(layer.input_channel, n_pre_filters + n_add_filters, layer.kernel_size) new_pre_layer.set_weights((add_noise(student_w, teacher_w), add_noise(student_b, teacher_b))) return new_pre_layer def wider_next_conv(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: - return StubConv(layer.filters, kernel_size=layer.kernel_size, func=layer.func) - filter_shape = layer.kernel_size + return StubConv(layer.input_channel + n_add, layer.filters, kernel_size=layer.kernel_size) n_filters = layer.filters teacher_w, teacher_b = layer.get_weights() new_weight_shape = list(teacher_w.shape) - new_weight_shape[-2] = n_add + new_weight_shape[1] = n_add new_weight = np.zeros(tuple(new_weight_shape)) - student_w = np.concatenate((teacher_w[..., :start_dim, :].copy(), + student_w = np.concatenate((teacher_w[:, :start_dim, ...].copy(), add_noise(new_weight, teacher_w), - teacher_w[..., start_dim:total_dim, :].copy()), axis=-2) - new_layer = StubConv(n_filters, kernel_size=filter_shape, func=layer.func) + teacher_w[:, start_dim:total_dim, ...].copy()), axis=1) + new_layer = StubConv(layer.input_channel + n_add, n_filters, layer.kernel_size) new_layer.set_weights((student_w, teacher_b)) return new_layer def wider_bn(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: - return StubBatchNormalization() + return StubBatchNormalization(layer.num_features + n_add) weights = layer.get_weights() - new_weights = [np.ones(n_add, dtype=np.float32), - np.zeros(n_add, dtype=np.float32), - np.zeros(n_add, dtype=np.float32), - np.ones(n_add, dtype=np.float32)] + new_weights = [add_noise(np.ones(n_add, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_add, dtype=np.float32), np.array([0, 1])), + add_noise(np.zeros(n_add, dtype=np.float32), np.array([0, 1])), + add_noise(np.ones(n_add, dtype=np.float32), np.array([0, 1]))] student_w = tuple() for weight, new_weight in zip(weights, new_weights): temp_w = weight.copy() temp_w = np.concatenate((temp_w[:start_dim], new_weight, temp_w[start_dim:total_dim])) student_w += (temp_w,) - new_layer = StubBatchNormalization() + new_layer = StubBatchNormalization(layer.num_features + n_add) new_layer.set_weights(student_w) return new_layer def wider_next_dense(layer, start_dim, total_dim, n_add, weighted=True): if not weighted: - return StubDense(layer.units, layer.activation) - n_units = layer.units + return StubDense(layer.input_units + n_add, layer.units) teacher_w, teacher_b = layer.get_weights() student_w = teacher_w.copy() n_units_each_channel = int(teacher_w.shape[0] / total_dim) - new_weight = np.zeros((n_add * n_units_each_channel, teacher_w.shape[1])) - student_w = np.concatenate((student_w[:start_dim * n_units_each_channel], + new_weight = np.zeros((teacher_w.shape[0], n_add * n_units_each_channel)) + student_w = np.concatenate((student_w[:, :start_dim * n_units_each_channel], add_noise(new_weight, student_w), - student_w[start_dim * n_units_each_channel:total_dim * n_units_each_channel])) + student_w[:, start_dim * n_units_each_channel:total_dim * n_units_each_channel]), + axis=1) - new_layer = StubDense(n_units, layer.activation) + new_layer = StubDense(layer.input_units + n_add, layer.units) new_layer.set_weights((student_w, teacher_b)) return new_layer -def wider_weighted_add(layer, n_add, weighted=True): - if not weighted: - return StubAdd() - - n_add += 0 - new_layer = StubAdd() - new_layer.set_weights(layer.get_weights()) - return new_layer - - def add_noise(weights, other_weights): w_range = np.ptp(other_weights.flatten()) noise_range = NOISE_RATIO * w_range diff --git a/autokeras/layers.py b/autokeras/layers.py index 70a2cb915..71967f5fb 100644 --- a/autokeras/layers.py +++ b/autokeras/layers.py @@ -1,8 +1,5 @@ -from keras.engine import InputLayer -from keras.layers import Add, Conv2D, Conv3D, Conv1D, Dense, BatchNormalization, Concatenate, Dropout, Activation, \ - Flatten, MaxPooling1D, MaxPooling2D, MaxPooling3D, GlobalAveragePooling1D, GlobalAveragePooling2D, \ - GlobalAveragePooling3D -from keras.regularizers import l2 +import torch +from torch import nn class StubLayer: @@ -11,7 +8,6 @@ def __init__(self, input_node=None, output_node=None): self.output = output_node self.weights = None self.input_shape = None - self.output_shape = None def build(self, shape): pass @@ -19,35 +15,71 @@ def build(self, shape): def set_weights(self, weights): self.weights = weights + def import_weights(self, torch_layer): + pass + + def export_weights(self, torch_layer): + pass + def get_weights(self): return self.weights + @property + def output_shape(self): + return self.input.shape -class StubBatchNormalization(StubLayer): - pass + +class StubWeightBiasLayer(StubLayer): + def import_weights(self, torch_layer): + self.set_weights((torch_layer.weight.data.numpy(), torch_layer.bias.data.numpy())) + + def export_weights(self, torch_layer): + torch_layer.weight.data = torch.Tensor(self.weights[0]) + torch_layer.bias.data = torch.Tensor(self.weights[1]) + + +class StubBatchNormalization(StubWeightBiasLayer): + def __init__(self, num_features, input_node=None, output_node=None): + super().__init__(input_node, output_node) + self.num_features = num_features + + def import_weights(self, torch_layer): + self.set_weights((torch_layer.weight.data.numpy(), + torch_layer.bias.data.numpy(), + torch_layer.running_mean.numpy(), + torch_layer.running_var.numpy(), + )) + + def export_weights(self, torch_layer): + torch_layer.weight.data = torch.Tensor(self.weights[0]) + torch_layer.bias.data = torch.Tensor(self.weights[1]) + torch_layer.running_mean = torch.Tensor(self.weights[2]) + torch_layer.running_var = torch.Tensor(self.weights[3]) -class StubDense(StubLayer): - def __init__(self, units, activation, input_node=None, output_node=None): +class StubDense(StubWeightBiasLayer): + def __init__(self, input_units, units, input_node=None, output_node=None): super().__init__(input_node, output_node) + self.input_units = input_units self.units = units - self.output_shape = (None, units) - self.activation = activation + @property + def output_shape(self): + return self.units, -class StubConv(StubLayer): - def __init__(self, filters, kernel_size, func, input_node=None, output_node=None): + +class StubConv(StubWeightBiasLayer): + def __init__(self, input_channel, filters, kernel_size, input_node=None, output_node=None): super().__init__(input_node, output_node) + self.input_channel = input_channel self.filters = filters - self.output_shape = (None, filters) self.kernel_size = kernel_size - self.func = func - if func is Conv1D: - self.n_dim = 1 - if func is Conv2D: - self.n_dim = 2 - if func is Conv3D: - self.n_dim = 3 + + @property + def output_shape(self): + ret = self.input.shape[:-1] + ret = ret + (self.filters,) + return ret class StubAggregateLayer(StubLayer): @@ -58,27 +90,50 @@ def __init__(self, input_nodes=None, output_node=None): class StubConcatenate(StubAggregateLayer): - pass + @property + def output_shape(self): + ret = 0 + for current_input in self.input: + ret += current_input.shape[-1] + ret = self.input[0].shape[:-1] + (ret,) + return ret class StubAdd(StubAggregateLayer): - pass + @property + def output_shape(self): + return self.input[0].shape class StubFlatten(StubLayer): + @property + def output_shape(self): + ret = 1 + for dim in self.input.shape: + ret *= dim + return ret, + + +class StubReLU(StubLayer): pass -class StubActivation(StubLayer): - def __init__(self, activation, input_node=None, output_node=None): - super().__init__(input_node, output_node) - self.activation = activation +class StubSoftmax(StubLayer): + pass class StubPooling(StubLayer): - def __init__(self, func, input_node=None, output_node=None): + def __init__(self, kernel_size=2, input_node=None, output_node=None): super().__init__(input_node, output_node) - self.func = func + self.kernel_size = kernel_size + + @property + def output_shape(self): + ret = tuple() + for dim in self.input.shape[:-1]: + ret = ret + (int(dim / self.kernel_size),) + ret = ret + (self.input.shape[-1],) + return ret class StubGlobalPooling(StubLayer): @@ -100,27 +155,29 @@ def __init__(self, input_node=None, output_node=None): def is_layer(layer, layer_type): if layer_type == 'Input': - return isinstance(layer, (InputLayer, StubInput)) + return isinstance(layer, StubInput) if layer_type == 'Conv': - return isinstance(layer, StubConv) or is_conv_layer(layer) + return isinstance(layer, StubConv) if layer_type == 'Dense': - return isinstance(layer, (StubDense, Dense)) + return isinstance(layer, (StubDense,)) if layer_type == 'BatchNormalization': - return isinstance(layer, (StubBatchNormalization, BatchNormalization)) + return isinstance(layer, (StubBatchNormalization,)) if layer_type == 'Concatenate': - return isinstance(layer, (StubConcatenate, Concatenate)) + return isinstance(layer, (StubConcatenate,)) if layer_type == 'Add': - return isinstance(layer, (StubAdd, Add)) + return isinstance(layer, (StubAdd,)) if layer_type == 'Pooling': - return isinstance(layer, StubPooling) or is_pooling_layer(layer) + return isinstance(layer, StubPooling) if layer_type == 'Dropout': - return isinstance(layer, (StubDropout, Dropout)) - if layer_type == 'Activation': - return isinstance(layer, (StubActivation, Activation)) + return isinstance(layer, (StubDropout,)) + if layer_type == 'Softmax': + return isinstance(layer, (StubSoftmax,)) + if layer_type == 'ReLU': + return isinstance(layer, (StubReLU,)) if layer_type == 'Flatten': - return isinstance(layer, (StubFlatten, Flatten)) + return isinstance(layer, (StubFlatten,)) if layer_type == 'GlobalAveragePooling': - return isinstance(layer, StubGlobalPooling) or is_global_pooling_layer(layer) + return isinstance(layer, StubGlobalPooling) def layer_width(layer): @@ -131,94 +188,50 @@ def layer_width(layer): raise TypeError('The layer should be either Dense or Conv layer.') -def is_pooling_layer(layer): - return isinstance(layer, (MaxPooling1D, MaxPooling2D, MaxPooling3D)) - - -def is_global_pooling_layer(layer): - return isinstance(layer, (GlobalAveragePooling1D, GlobalAveragePooling2D, GlobalAveragePooling3D)) +class TorchConcatenate(nn.Module): + def forward(self, input_list): + return torch.cat(input_list, dim=1) -def get_conv_layer_func(n_dim): - conv_layer_functions = [Conv1D, Conv2D, Conv3D] - if n_dim > 3: - raise ValueError('The input dimension is too high.') - if n_dim < 1: - raise ValueError('The input dimension is too low.') - return conv_layer_functions[n_dim - 1] +class TorchAdd(nn.Module): + def forward(self, input_list): + return input_list[0] + input_list[1] -def get_ave_layer_func(n_dim): - conv_layer_functions = [GlobalAveragePooling1D, GlobalAveragePooling2D, GlobalAveragePooling3D] - if n_dim > 3: - raise ValueError('The input dimension is too high.') - if n_dim < 1: - raise ValueError('The input dimension is too low.') - return conv_layer_functions[n_dim - 1] - - -def is_conv_layer(layer): - return isinstance(layer, tuple(CONV_FUNC_LIST)) - - -def is_dense_layer(layer): - return isinstance(layer, Dense) +class TorchFlatten(nn.Module): + def forward(self, input_tensor): + return input_tensor.view(input_tensor.size(0), -1) def to_real_layer(layer): if is_layer(layer, 'Dense'): - return Dense(layer.units, activation=layer.activation) + return torch.nn.Linear(layer.input_units, layer.units) if is_layer(layer, 'Conv'): - return Conv2D(layer.filters, - kernel_size=layer.kernel_size, - padding='same', - kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) + return torch.nn.Conv2d(layer.input_channel, + layer.filters, + layer.kernel_size, + padding=layer.kernel_size / 2) if is_layer(layer, 'Pooling'): - return MaxPooling2D(padding='same') + return torch.nn.MaxPool2d(2) if is_layer(layer, 'BatchNormalization'): - return BatchNormalization() + return torch.nn.BatchNorm2d(layer.num_features) if is_layer(layer, 'Concatenate'): - return Concatenate() + return TorchConcatenate() if is_layer(layer, 'Add'): - return Add() + return TorchAdd() if is_layer(layer, 'Dropout'): - return Dropout(layer.rate) - if is_layer(layer, 'Activation'): - return Activation(layer.activation) + return torch.nn.Dropout2d(layer.rate) + if is_layer(layer, 'ReLU'): + return torch.nn.ReLU() + if is_layer(layer, 'Softmax'): + return torch.nn.Softmax() if is_layer(layer, 'Flatten'): - return Flatten() - if is_layer(layer, 'GlobalAveragePooling'): - return GlobalAveragePooling2D() - - -def to_stub_layer(layer, input_id, output_id): - if is_conv_layer(layer): - temp_stub_layer = StubConv(layer.filters, layer.kernel_size, layer.__class__, input_id, output_id) - elif is_layer(layer, 'Dense'): - temp_stub_layer = StubDense(layer.units, layer.activation, input_id, output_id) - elif is_layer(layer, 'Add'): - temp_stub_layer = StubAdd(input_id, output_id) - elif is_layer(layer, 'Concatenate'): - temp_stub_layer = StubConcatenate(input_id, output_id) - elif is_layer(layer, 'BatchNormalization'): - temp_stub_layer = StubBatchNormalization(input_id, output_id) - elif is_layer(layer, 'Activation'): - temp_stub_layer = StubActivation(layer.activation, input_id, output_id) - elif is_layer(layer, 'Input'): - temp_stub_layer = StubInput(input_id, output_id) - elif is_layer(layer, 'Flatten'): - temp_stub_layer = StubFlatten(input_id, output_id) - elif is_layer(layer, 'Dropout'): - temp_stub_layer = StubDropout(layer.rate, input_id, output_id) - elif is_layer(layer, 'Pooling'): - temp_stub_layer = StubPooling(layer.__class__, input_id, output_id) - elif is_layer(layer, 'GlobalAveragePooling'): - temp_stub_layer = StubGlobalPooling(layer.__class__, input_id, output_id) - else: - raise TypeError("The layer {} is illegal.".format(layer)) - return temp_stub_layer - - -CONV_FUNC_LIST = [Conv1D, Conv2D, Conv3D, StubConv] -WEIGHTED_LAYER_FUNC_LIST = CONV_FUNC_LIST + [Dense, StubDense] + return TorchFlatten() + + +def set_torch_weight_to_stub(torch_layer, stub_layer): + stub_layer.import_weights(torch_layer) + + +def set_stub_weight_to_torch(stub_layer, torch_layer): + stub_layer.export_weights(torch_layer) diff --git a/autokeras/net_transformer.py b/autokeras/net_transformer.py index b10ed8b45..57f055596 100644 --- a/autokeras/net_transformer.py +++ b/autokeras/net_transformer.py @@ -2,7 +2,7 @@ from random import randint, randrange from autokeras import constant -from autokeras.layers import is_conv_layer +from autokeras.layers import is_layer def to_wider_graph(graph): @@ -12,7 +12,7 @@ def to_wider_graph(graph): else: target_id = weighted_layer_ids[randint(0, len(weighted_layer_ids) - 1)] - if is_conv_layer(graph.layer_list[target_id]): + if is_layer(graph.layer_list[target_id], 'Conv'): n_add = graph.layer_list[target_id].filters else: n_add = graph.layer_list[target_id].units @@ -45,7 +45,7 @@ def to_skip_connection_graph(graph): def to_deeper_graph(graph): weighted_layer_ids = graph.deep_layer_ids() target_id = weighted_layer_ids[randint(0, len(weighted_layer_ids) - 1)] - if is_conv_layer(graph.layer_list[target_id]): + if is_layer(graph.layer_list[target_id], 'Conv'): graph.to_conv_deeper_model(target_id, randint(1, 2) * 2 + 1) else: graph.to_dense_deeper_model(target_id) @@ -70,7 +70,7 @@ def transform(graph): graphs.append(to_wider_graph(deepcopy(graph))) elif a == 2: graphs.append(to_skip_connection_graph(deepcopy(graph))) - return list(filter(lambda graph: legal_graph(graph), graphs)) + return list(filter(lambda x: legal_graph(x), graphs)) def default_transform(graph): @@ -79,7 +79,7 @@ def default_transform(graph): graph.to_conv_deeper_model(1, 3) graph.to_conv_deeper_model(6, 3) graph.to_conv_deeper_model(11, 3) - graph.to_add_skip_model(1, 18) - graph.to_add_skip_model(18, 26) - graph.to_add_skip_model(26, 30) + graph.to_add_skip_model(1, 19) + graph.to_add_skip_model(19, 27) + graph.to_add_skip_model(27, 31) return [graph] diff --git a/autokeras/preprocessor.py b/autokeras/preprocessor.py index f60ae497a..14462e07d 100644 --- a/autokeras/preprocessor.py +++ b/autokeras/preprocessor.py @@ -1,4 +1,10 @@ +import torch + import numpy as np +from torch.utils.data import Dataset +from torchvision.transforms import ToPILImage, RandomCrop, RandomHorizontalFlip, ToTensor, Normalize, Compose + +from autokeras import constant class OneHotEncoder: @@ -13,6 +19,7 @@ class OneHotEncoder: label_to_vec: mapping from label to vector int_to_label: mapping from int to label """ + def __init__(self): """Init OneHotEncoder""" self.data = None @@ -42,3 +49,45 @@ def transform(self, data): def inverse_transform(self, data): """Get label for every element in data""" return np.array(list(map(lambda x: self.int_to_label[x], np.argmax(np.array(data), axis=1)))) + + +class DataTransformer: + def __init__(self, data, augment=constant.DATA_AUGMENTATION): + self.mean = np.mean(data, axis=(0, 1, 2), keepdims=True).flatten() + self.std = np.std(data, axis=(0, 1, 2), keepdims=True).flatten() + self.augment = augment + + def transform_train(self, data, targets=None): + data = torch.Tensor(data.transpose(0, 3, 1, 2)) + if not self.augment: + augment_list = [] + else: + augment_list = [ToPILImage(), + RandomCrop(data.shape[2:], padding=4), + RandomHorizontalFlip(), + ToTensor()] + common_list = [Normalize(torch.Tensor(self.mean), torch.Tensor(self.std))] + data_transforms = Compose(augment_list + common_list) + return MultiTransformDataset(data, targets, data_transforms) + + def transform_test(self, data, targets=None): + data = torch.Tensor(data.transpose(0, 3, 1, 2)) + common_list = [Normalize(torch.Tensor(self.mean), torch.Tensor(self.std))] + data_transforms = Compose(common_list) + return MultiTransformDataset(data, targets, data_transforms) + + +class MultiTransformDataset(Dataset): + def __init__(self, dataset, target, compose): + self.dataset = dataset + self.target = target + self.compose = compose + + def __getitem__(self, index): + feature = self.dataset[index] + if self.target is None: + return self.compose(feature) + return self.compose(feature), self.target[index] + + def __len__(self): + return len(self.dataset) diff --git a/autokeras/search.py b/autokeras/search.py index 2ff41c4b6..99576a501 100644 --- a/autokeras/search.py +++ b/autokeras/search.py @@ -152,7 +152,7 @@ def init_search(self): if self.verbose: print('Initialization finished.') - def search(self, x_train, y_train, x_test, y_test): + def search(self, train_data, test_data): if not self.history: self.init_search() @@ -161,7 +161,7 @@ def search(self, x_train, y_train, x_test, y_test): if self.verbose: print('Training model ', model_id) pool = multiprocessing.Pool(1) - train_results = pool.map_async(train, [(graph, x_train, y_train, x_test, y_test, self.trainer_args, + train_results = pool.map_async(train, [(graph, train_data, test_data, self.trainer_args, os.path.join(self.path, str(model_id) + '.png'))]) # Do the search in current thread. @@ -298,14 +298,13 @@ def __lt__(self, other): def train(args): - graph, x_train, y_train, x_test, y_test, trainer_args, path = args + graph, train_data, test_data, trainer_args, path = args model = graph.produce_model() # if path is not None: # plot_model(model, to_file=path, show_shapes=True) loss, accuracy = ModelTrainer(model, - x_train, - y_train, - x_test, - y_test, + train_data, + test_data, False).train_model(**trainer_args) - return accuracy, loss, Graph(model, True) + model.set_weight_to_graph() + return accuracy, loss, model.graph diff --git a/autokeras/stub.py b/autokeras/stub.py deleted file mode 100644 index 4d9947d54..000000000 --- a/autokeras/stub.py +++ /dev/null @@ -1,50 +0,0 @@ -from autokeras.layers import to_stub_layer - - -class StubModel: - def __init__(self): - self.layers = [] - self.input_shape = None - self.inputs = [] - self.outputs = [] - - def add_layer(self, layer): - self.layers.append(layer) - - -def to_stub_model(model, weighted=False): - node_count = 0 - tensor_dict = {} - ret = StubModel() - ret.input_shape = model.input_shape - for layer in model.layers: - if isinstance(layer.input, list): - input_nodes = layer.input - else: - input_nodes = [layer.input] - - for node in input_nodes + [layer.output]: - if node not in tensor_dict: - tensor_dict[node] = StubTensor() - node_count += 1 - - if isinstance(layer.input, list): - input_id = [] - for node in layer.input: - input_id.append(tensor_dict[node]) - else: - input_id = tensor_dict[layer.input] - output_id = tensor_dict[layer.output] - - temp_stub_layer = to_stub_layer(layer, input_id, output_id) - if weighted: - temp_stub_layer.set_weights(layer.get_weights()) - ret.add_layer(temp_stub_layer) - ret.inputs = [tensor_dict[model.inputs[0]]] - ret.outputs = [tensor_dict[model.outputs[0]]] - return ret - - -class StubTensor: - def __init__(self, shape=None): - self.shape = shape diff --git a/autokeras/utils.py b/autokeras/utils.py index 6d372ddc1..03b042701 100644 --- a/autokeras/utils.py +++ b/autokeras/utils.py @@ -1,11 +1,8 @@ import os import pickle -import numpy as np +import torch -from keras.callbacks import Callback, LearningRateScheduler, ReduceLROnPlateau -from keras.losses import categorical_crossentropy -from keras.optimizers import Adam -from keras.preprocessing.image import ImageDataGenerator +from torch.utils.data import DataLoader from autokeras import constant @@ -28,7 +25,7 @@ def __init__(self, message): self.message = message -class EarlyStop(Callback): +class EarlyStop: def __init__(self, max_no_improvement_num=constant.MAX_NO_IMPROVEMENT_NUM, min_loss_dec=constant.MIN_LOSS_DEC): super().__init__() self.training_losses = [] @@ -45,13 +42,10 @@ def on_train_begin(self, logs=None): self._done = False self.minimum_loss = float('inf') - def on_epoch_end(self, batch, logs=None): - # self.max_accuracy = max(self.max_accuracy, logs.get('val_acc')) - self.max_accuracy = logs.get('val_acc') - loss = logs.get('val_loss') + def on_epoch_end(self, loss): self.training_losses.append(loss) if self._done and loss > (self.minimum_loss - self._min_loss_dec): - raise NoImprovementError('No improvement for {} epochs.'.format(self._max_no_improvement_num)) + return False if loss > (self.minimum_loss - self._min_loss_dec): self._no_improvement_count += 1 @@ -62,6 +56,8 @@ def on_epoch_end(self, batch, logs=None): if self._no_improvement_count > self._max_no_improvement_num: self._done = True + return True + class ModelTrainer: """A class that is used to train model @@ -77,21 +73,21 @@ class ModelTrainer: verbose: verbosity mode """ - def __init__(self, model, x_train, y_train, x_test, y_test, verbose): + def __init__(self, model, train_data, test_data, verbose): """Init ModelTrainer with model, x_train, y_train, x_test, y_test, verbose""" self.model = model - self.x_train = x_train.astype('float32') / 255 - self.y_train = y_train - self.x_test = x_test.astype('float32') / 255 - self.y_test = y_test self.verbose = verbose + self.train_data = train_data + self.test_data = test_data + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.criterion = torch.nn.CrossEntropyLoss() + self.optimizer = torch.optim.SGD(model.parameters(), lr=lr_schedule(0), momentum=0.9, weight_decay=5e-4) + self.early_stop = None def train_model(self, max_iter_num=constant.MAX_ITER_NUM, max_no_improvement_num=constant.MAX_NO_IMPROVEMENT_NUM, - batch_size=constant.MAX_BATCH_SIZE, - optimizer=None, - augment=constant.DATA_AUGMENTATION): + batch_size=constant.MAX_BATCH_SIZE): """Train the model. Args: @@ -101,73 +97,58 @@ def train_model(self, The training will stop when this number is reached. batch_size: An integer. The batch size during the training. optimizer: An optimizer class. - augment: A boolean of whether the data will be augmented. """ - if augment: - datagen = ImageDataGenerator( - # set input mean to 0 over the dataset - featurewise_center=False, - # set each sample mean to 0 - samplewise_center=False, - # divide inputs by std of dataset - featurewise_std_normalization=False, - # divide each input by its std - samplewise_std_normalization=False, - # apply ZCA whitening - zca_whitening=False, - # randomly rotate images in the range (deg 0 to 180) - rotation_range=0, - # randomly shift images horizontally - width_shift_range=0.1, - # randomly shift images vertically - height_shift_range=0.1, - # randomly flip images - horizontal_flip=True, - # randomly flip images - vertical_flip=False) - datagen.fit(self.x_train) - else: - datagen = None - if optimizer is None: - self.model.compile(loss=categorical_crossentropy, - optimizer=Adam(lr=lr_schedule(0)), - metrics=['accuracy']) - else: - self.model.compile(loss=categorical_crossentropy, - optimizer=optimizer(), - metrics=['accuracy']) - - batch_size = min(self.x_train.shape[0], batch_size) - terminator = EarlyStop(max_no_improvement_num=max_no_improvement_num) - lr_scheduler = LearningRateScheduler(lr_schedule) - - lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) - - callbacks = [terminator, lr_scheduler, lr_reducer] - try: - if augment: - flow = datagen.flow(self.x_train, self.y_train, batch_size) - self.model.fit_generator(flow, - epochs=max_iter_num, - validation_data=(self.x_test, self.y_test), - callbacks=callbacks, - verbose=self.verbose) - else: - self.model.fit(self.x_train, self.y_train, - batch_size=batch_size, - epochs=max_iter_num, - validation_data=(self.x_test, self.y_test), - callbacks=callbacks, - verbose=self.verbose) - except NoImprovementError as e: - if self.verbose: - print('Training finished!') - print(e.message) - return terminator.minimum_loss, terminator.max_accuracy - return terminator.minimum_loss, terminator.max_accuracy + batch_size = min(len(self.train_data), batch_size) + + train_loader = DataLoader(self.train_data, batch_size=batch_size, shuffle=True) + test_loader = DataLoader(self.test_data, batch_size=batch_size, shuffle=True) + + self.early_stop = EarlyStop(max_no_improvement_num) + self.early_stop.on_train_begin() + + for epoch in range(max_iter_num): + self._train(train_loader) + test_loss = self._test(test_loader) + terminate = self.early_stop.on_epoch_end(test_loss) + if terminate: + break + + def _train(self, loader): + self.model.train() + train_loss = 0 + correct = 0 + total = 0 + for batch_idx, (inputs, targets) in enumerate(loader): + targets = targets.argmax(1) + inputs, targets = inputs.to(self.device), targets.to(self.device) + self.optimizer.zero_grad() + outputs = self.model(inputs) + loss = self.criterion(outputs, targets) + loss.backward() + self.optimizer.step() + + train_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + def _test(self, test_loader): + self.model.eval() + test_loss = 0 + correct = 0 + total = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(test_loader): + targets = targets.argmax(1) + inputs, targets = inputs.to(self.device), targets.to(self.device) + outputs = self.model(inputs) + loss = self.criterion(outputs, targets) + + test_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + return test_loss def ensure_dir(directory): diff --git a/experiments/bn.py b/experiments/bn.py new file mode 100644 index 000000000..638ba4c61 --- /dev/null +++ b/experiments/bn.py @@ -0,0 +1,25 @@ +import numpy as np + +from keras import Sequential +from keras.layers import BatchNormalization + +bn = BatchNormalization(input_shape=(28, 28, 3), epsilon=0) +model = Sequential([bn]) +model.compile(optimizer='rmsprop', + loss='mse') + +n_filters = 3 +new_weights = [ + np.ones(n_filters, dtype=np.float32), + np.zeros(n_filters, dtype=np.float32), + np.zeros(n_filters, dtype=np.float32), + np.ones(n_filters, dtype=np.float32) +] +bn.set_weights(new_weights) + +x_train = np.random.rand(2, 28, 28, 3) +output = model.predict_on_batch(x_train) + +print(x_train.shape) +print(output.shape) +print(np.sum(np.abs(x_train - output))) diff --git a/experiments/default.py b/experiments/default.py new file mode 100644 index 000000000..6826e5ce1 --- /dev/null +++ b/experiments/default.py @@ -0,0 +1,30 @@ +from keras.datasets import cifar10 + +from autokeras.generator import DefaultClassifierGenerator +from autokeras.net_transformer import default_transform +from autokeras.preprocessor import OneHotEncoder +from autokeras.utils import ModelTrainer + + +if __name__ == '__main__': + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + + print('Start Encoding') + encoder = OneHotEncoder() + encoder.fit(y_train) + + y_train = encoder.transform(y_train) + y_test = encoder.transform(y_test) + + print('Start Generating') + graphs = default_transform(DefaultClassifierGenerator(10, x_train.shape[1:]).generate()) + keras_model = graphs[0].produce_model() + + print('Start Training') + ModelTrainer(keras_model, + x_train, + y_train, + x_test, + y_test, + True).train_model(max_no_improvement_num=100, batch_size=128) + print(keras_model.evaluate(x_test, y_test, True)) diff --git a/experiments/pytorch_cifar10.py b/experiments/pytorch_cifar10.py new file mode 100644 index 000000000..2f0b56a80 --- /dev/null +++ b/experiments/pytorch_cifar10.py @@ -0,0 +1,141 @@ +'''Train CIFAR10 with PyTorch.''' +from __future__ import print_function + +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +import torch.backends.cudnn as cudnn + +import torchvision +import torchvision.transforms as transforms + +import os +import argparse + +from models import * +from utils import progress_bar + + +parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training') +parser.add_argument('--lr', default=0.1, type=float, help='learning rate') +parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') +args = parser.parse_args() + +device = 'cuda' if torch.cuda.is_available() else 'cpu' +best_acc = 0 # best test accuracy +start_epoch = 0 # start from epoch 0 or last checkpoint epoch + +# Data +print('==> Preparing data..') +transform_train = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), +]) + +transform_test = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), +]) + +trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) +trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) + +testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) +testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) + +classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') + +# Model +print('==> Building model..') +# net = VGG('VGG19') +net = ResNet18() +# net = PreActResNet18() +# net = GoogLeNet() +# net = DenseNet121() +# net = ResNeXt29_2x64d() +# net = MobileNet() +# net = MobileNetV2() +# net = DPN92() +# net = ShuffleNetG2() +# net = SENet18() +net = net.to(device) +if device == 'cuda': + net = torch.nn.DataParallel(net) + cudnn.benchmark = True + +if args.resume: + # Load checkpoint. + print('==> Resuming from checkpoint..') + assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' + checkpoint = torch.load('./checkpoint/ckpt.t7') + net.load_state_dict(checkpoint['net']) + best_acc = checkpoint['acc'] + start_epoch = checkpoint['epoch'] + +criterion = nn.CrossEntropyLoss() +optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) + +# Training +def train(epoch): + print('\nEpoch: %d' % epoch) + net.train() + train_loss = 0 + correct = 0 + total = 0 + for batch_idx, (inputs, targets) in enumerate(trainloader): + inputs, targets = inputs.to(device), targets.to(device) + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + + train_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) + +def test(epoch): + global best_acc + net.eval() + test_loss = 0 + correct = 0 + total = 0 + with torch.no_grad(): + for batch_idx, (inputs, targets) in enumerate(testloader): + inputs, targets = inputs.to(device), targets.to(device) + outputs = net(inputs) + loss = criterion(outputs, targets) + + test_loss += loss.item() + _, predicted = outputs.max(1) + total += targets.size(0) + correct += predicted.eq(targets).sum().item() + + progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) + + # Save checkpoint. + acc = 100.*correct/total + if acc > best_acc: + print('Saving..') + state = { + 'net': net.state_dict(), + 'acc': acc, + 'epoch': epoch, + } + if not os.path.isdir('checkpoint'): + os.mkdir('checkpoint') + torch.save(state, './checkpoint/ckpt.t7') + best_acc = acc + + +for epoch in range(start_epoch, start_epoch+200): + train(epoch) + test(epoch) \ No newline at end of file diff --git a/experiments/tc.py b/experiments/tc.py new file mode 100644 index 000000000..4d8d2fc0d --- /dev/null +++ b/experiments/tc.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +import torch + + +class TwoLayerNet(torch.nn.Module): + def __init__(self, D_in, H, D_out): + """ + In the constructor we instantiate two nn.Linear modules and assign them as + member variables. + """ + super(TwoLayerNet, self).__init__() + self.linear1 = torch.nn.Linear(D_in, H) + self.linear2 = torch.nn.Linear(H, D_out) + + def forward(self, x): + """ + In the forward function we accept a Tensor of input data and we must return + a Tensor of output data. We can use Modules defined in the constructor as + well as arbitrary operators on Tensors. + """ + h_relu = self.linear1(x).clamp(min=0) + y_pred = self.linear2(h_relu) + return y_pred + + +# N is batch size; D_in is input dimension; +# H is hidden dimension; D_out is output dimension. +N, D_in, H, D_out = 64, 1000, 100, 10 + +# Create random Tensors to hold inputs and outputs +x = torch.randn(N, D_in) +y = torch.randn(N, D_out) + +# Construct our model by instantiating the class defined above +model = TwoLayerNet(D_in, H, D_out) + +# Construct our loss function and an Optimizer. The call to model.parameters() +# in the SGD constructor will contain the learnable parameters of the two +# nn.Linear modules which are members of the model. +criterion = torch.nn.MSELoss(size_average=False) +optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) +model.linear1.weight = torch.nn.Parameter(torch.zeros(H, D_in)) +model.linear1.bias = torch.nn.Parameter(torch.zeros(H)) +model.linear2.weight = torch.nn.Parameter(torch.zeros(D_out, H)) +model.linear2.bias = torch.nn.Parameter(torch.zeros(D_out)) +for t in range(500): + # Forward pass: Compute predicted y by passing x to the model + y_pred = model(x) + + # Compute and print loss + loss = criterion(y_pred, y) + print(t, loss.item()) + + # Zero gradients, perform a backward pass, and update the weights. + optimizer.zero_grad() + loss.backward() + optimizer.step() + +for param in model.linear1.parameters(): + print(param.data) + + +import numpy as np +a = np.ones(5) +b = torch.from_numpy(a) +np.add(a, 1, out=a) +print(a) +print(b) diff --git a/requirements.txt b/requirements.txt index a66b2eba4..4128e0e49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,8 @@ -absl-py==0.2.2 -astor==0.6.2 -atomicwrites==1.1.5 -attrs==18.1.0 -bleach==1.5.0 +torch==0.4.0 +torchvision==0.2.1 coverage==4.5.1 -gast==0.2.0 -grpcio==1.12.1 h5py==2.8.0 -html5lib==0.9999999 -Keras==2.1.6 -Keras-Applications==1.0.2 -Keras-Preprocessing==1.0.1 -Markdown==2.6.11 -more-itertools==4.2.0 numpy==1.14.5 -Pillow==5.1.0 -pluggy==0.6.0 -protobuf==3.6.0 py==1.5.4 pydot==1.2.4 pyparsing==2.2.0 @@ -24,8 +10,3 @@ pytest==3.6.2 pytest-cov==2.5.1 scikit-learn==0.19.1 scipy==1.1.0 -six==1.11.0 -tensorboard==1.8.0 -tensorflow==1.8.0 -termcolor==1.1.0 -Werkzeug==0.14.1 diff --git a/tests/common.py b/tests/common.py index 5c01a3dde..b21e45538 100644 --- a/tests/common.py +++ b/tests/common.py @@ -1,149 +1,213 @@ import os import numpy as np -from keras import Input -from keras.engine import Model -from keras.layers import Conv2D, BatchNormalization, Activation, Flatten, Dense, MaxPooling2D, Concatenate, Dropout, \ - GlobalAveragePooling2D, Add from autokeras import constant +from autokeras.graph import Graph +from autokeras.layers import StubReLU, StubConv, StubBatchNormalization, StubDropout, StubFlatten, StubSoftmax, \ + StubDense, StubConcatenate, StubAdd, StubPooling +from autokeras.preprocessor import DataTransformer def get_concat_skip_model(): - output_tensor = input_tensor = Input(shape=(5, 5, 3)) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - add_input = output_tensor - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Concatenate()([output_tensor, add_input]) - output_tensor = Conv2D(3, kernel_size=(1, 1), padding='same', activation='linear')(output_tensor) - add_input = output_tensor - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Concatenate()([output_tensor, add_input]) - output_tensor = Conv2D(3, kernel_size=(1, 1), padding='same', activation='linear')(output_tensor) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Flatten()(output_tensor) - output_tensor = Dense(5, activation='relu')(output_tensor) - output_tensor = Dropout(constant.DENSE_DROPOUT_RATE)(output_tensor) - output_tensor = Dense(5, activation='softmax')(output_tensor) - return Model(inputs=input_tensor, outputs=output_tensor) + graph = Graph((5, 5, 3), False) + output_node_id = 0 + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + temp_node_id = output_node_id + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubConcatenate(), [output_node_id, temp_node_id]) + output_node_id = graph.add_layer(StubConv(6, 3, 1), output_node_id) + + temp_node_id = output_node_id + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubConcatenate(), [output_node_id, temp_node_id]) + output_node_id = graph.add_layer(StubConv(6, 3, 1), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubFlatten(), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), + output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + graph.add_layer(StubSoftmax(), output_node_id) + + graph.produce_model().set_weight_to_graph() + + return graph def get_add_skip_model(): - output_tensor = input_tensor = Input(shape=(5, 5, 3)) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - add_input = output_tensor - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Add()([output_tensor, add_input]) - add_input = output_tensor - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Add()([output_tensor, add_input]) - output_tensor = Flatten()(output_tensor) - output_tensor = Dense(5, activation='relu')(output_tensor) - output_tensor = Dropout(constant.DENSE_DROPOUT_RATE)(output_tensor) - output_tensor = Dense(5, activation='softmax')(output_tensor) - return Model(inputs=input_tensor, outputs=output_tensor) - - -def get_conv_model(): - output_tensor = input_tensor = Input(shape=(5, 5, 3)) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - output_tensor = GlobalAveragePooling2D()(output_tensor) - output_tensor = Dense(5, activation='relu')(output_tensor) - output_tensor = Dropout(constant.DENSE_DROPOUT_RATE)(output_tensor) - output_tensor = Dense(5, activation='softmax')(output_tensor) - return Model(inputs=input_tensor, outputs=output_tensor) + graph = Graph((5, 5, 3), False) + output_node_id = 0 + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + temp_node_id = output_node_id + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) + output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) + + temp_node_id = output_node_id + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) + output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubFlatten(), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), + output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + graph.add_layer(StubSoftmax(), output_node_id) + + graph.produce_model().set_weight_to_graph() + + return graph def get_conv_data(): - return np.random.rand(1, 5, 5, 3) + return np.random.rand(1, 3, 5, 5) def get_conv_dense_model(): - output_tensor = input_tensor = Input(shape=(5, 5, 3)) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - output_tensor = GlobalAveragePooling2D()(output_tensor) - output_tensor = Dense(5, activation='relu')(output_tensor) - output_tensor = Dropout(constant.DENSE_DROPOUT_RATE)(output_tensor) - output_tensor = Dense(5, activation='softmax')(output_tensor) - return Model(inputs=input_tensor, outputs=output_tensor) + graph = Graph((5, 5, 3), False) + output_node_id = 0 + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubFlatten(), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), + output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + graph.add_layer(StubSoftmax(), output_node_id) + + graph.produce_model().set_weight_to_graph() + + return graph def get_pooling_model(): - output_tensor = input_tensor = Input(shape=(5, 5, 3)) - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = MaxPooling2D(padding='same')(output_tensor) - - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Activation('relu')(output_tensor) - output_tensor = Conv2D(3, kernel_size=(3, 3), padding='same', activation='linear')(output_tensor) - output_tensor = BatchNormalization()(output_tensor) - output_tensor = Dropout(constant.CONV_DROPOUT_RATE)(output_tensor) - - output_tensor = Flatten()(output_tensor) - output_tensor = Dense(5, activation='relu')(output_tensor) - output_tensor = Dropout(constant.DENSE_DROPOUT_RATE)(output_tensor) - output_tensor = Dense(5, activation='softmax')(output_tensor) - return Model(inputs=input_tensor, outputs=output_tensor) + + graph = Graph((5, 5, 3), False) + output_node_id = 0 + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubPooling(2), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) + output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubFlatten(), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), + output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + + output_node_id = graph.add_layer(StubReLU(), output_node_id) + output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) + output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) + graph.add_layer(StubSoftmax(), output_node_id) + + graph.produce_model().set_weight_to_graph() + + return graph + + +def get_processed_data(): + x_train = np.random.rand(20, 28, 28, 3) + y_train = np.random.rand(20, 3) + x_test = np.random.rand(10, 28, 28, 3) + y_test = np.random.rand(10, 3) + data_transformer = DataTransformer(x_train) + train_data = data_transformer.transform_train(x_train, y_train) + test_data = data_transformer.transform_train(x_test, y_test) + return train_data, test_data def clean_dir(path): diff --git a/tests/test_bayesian.py b/tests/test_bayesian.py index be86d59c1..07376c32f 100644 --- a/tests/test_bayesian.py +++ b/tests/test_bayesian.py @@ -1,18 +1,27 @@ from autokeras.bayesian import * -from autokeras.graph import Graph -from tests.common import get_add_skip_model, get_concat_skip_model +from tests.common import get_add_skip_model, get_concat_skip_model, get_conv_dense_model def test_edit_distance(): - descriptor1 = Graph(get_add_skip_model()).extract_descriptor() - descriptor2 = Graph(get_concat_skip_model()).extract_descriptor() + descriptor1 = get_add_skip_model().extract_descriptor() + descriptor2 = get_concat_skip_model().extract_descriptor() assert edit_distance(descriptor1, descriptor2, 1.0) == 2.0 +def test_edit_distance2(): + descriptor1 = get_conv_dense_model().extract_descriptor() + graph = get_conv_dense_model() + graph.to_conv_deeper_model(1, 3) + graph.to_wider_model(5, 6) + graph.to_wider_model(17, 3) + descriptor2 = graph.extract_descriptor() + assert edit_distance(descriptor1, descriptor2, 1.0) == 1.5 + + def test_gpr(): gpr = IncrementalGaussianProcess(1.0) - gpr.first_fit([Graph(get_add_skip_model()).extract_descriptor()], [0.5]) + gpr.first_fit([get_add_skip_model().extract_descriptor()], [0.5]) assert gpr.first_fitted - gpr.incremental_fit([Graph(get_concat_skip_model()).extract_descriptor()], [0.6]) - assert abs(gpr.predict(np.array([Graph(get_concat_skip_model()).extract_descriptor()]))[0] - 0.6) < 1e-4 + gpr.incremental_fit([get_concat_skip_model().extract_descriptor()], [0.6]) + assert abs(gpr.predict(np.array([get_concat_skip_model().extract_descriptor()]))[0] - 0.6) < 1e-4 diff --git a/tests/test_classifier.py b/tests/test_classifier.py index b2762063e..a4aa64d0a 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -2,7 +2,6 @@ from unittest.mock import patch import pytest -from keras.models import load_model from autokeras.classifier import * from autokeras import constant @@ -38,29 +37,6 @@ def simple_transform(graph): return [deepcopy(graph), deepcopy(graph)] -@patch('multiprocessing.Pool', new=MockProcess) -@patch('autokeras.search.transform', side_effect=simple_transform) -@patch('autokeras.search.ModelTrainer.train_model', side_effect=mock_train) -def test_export(_, _1): - constant.MAX_ITER_NUM = 1 - constant.MAX_MODEL_NUM = 4 - constant.SEARCH_MAX_ITER = 1 - constant.DATA_AUGMENTATION = False - path = 'tests/resources/temp' - clean_dir(path) - clf = ImageClassifier(path=path, verbose=False) - train_x = np.random.rand(100, 25, 25, 1) - train_y = np.random.randint(0, 5, 100) - clf.fit(train_x, train_y) - # results = clf.predict(train_x) - # assert all(map(lambda result: result in train_y, results)) - clf.export_keras_model(os.path.join(path, 'model')) - load_model(os.path.join(path, 'model')) - clf.export_keras_model(os.path.join(path, 'model1'), 1) - load_model(os.path.join(path, 'model1')) - clean_dir(path) - - @patch('multiprocessing.Pool', new=MockProcess) @patch('autokeras.search.transform', side_effect=simple_transform) @patch('autokeras.search.ModelTrainer.train_model', side_effect=mock_train) diff --git a/tests/test_generator.py b/tests/test_generator.py index 291a877a2..3a694cdcd 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -1,15 +1,9 @@ -import numpy as np - from autokeras.generator import * - - -def test_random_classifier_generator(): - generator = RandomConvClassifierGenerator(3, (28, 28, 1)) - for i in range(3): - model = generator.generate() - model.predict_on_batch(np.random.rand(2, 28, 28, 1)) +from autokeras.graph import TorchModel def test_default_generator(): generator = DefaultClassifierGenerator(3, (28, 28, 1)) - generator.generate() + graph = generator.generate() + model = graph.produce_model() + assert isinstance(model, TorchModel) diff --git a/tests/test_graph.py b/tests/test_graph.py index d70855bd9..075f6c92e 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,18 +1,12 @@ from autokeras.generator import DefaultClassifierGenerator from autokeras.graph import * from autokeras.net_transformer import legal_graph -from tests.common import get_conv_model, get_conv_data, get_add_skip_model, get_conv_dense_model, get_pooling_model, \ +from tests.common import get_conv_data, get_add_skip_model, get_conv_dense_model, get_pooling_model, \ get_concat_skip_model -def test_graph(): - graph = Graph(get_conv_model()) - assert graph.n_nodes == 13 - - def test_conv_deeper_stub(): - model = get_conv_model() - graph = Graph(model, False) + graph = get_conv_dense_model() layer_num = graph.n_layers graph.to_conv_deeper_model(5, 3) @@ -20,43 +14,49 @@ def test_conv_deeper_stub(): def test_conv_deeper(): - model = get_conv_model() - graph = Graph(model, True) + graph = get_conv_dense_model() + model = graph.produce_model() + graph = deepcopy(graph) graph.to_conv_deeper_model(5, 3) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + model.eval() + new_model.eval() + output1 = model(input_data) + output2 = new_model(input_data) - assert np.sum(np.abs(output1 - output2)) < 4e-1 + assert (output1 - output2).abs().sum() < 1e-1 def test_dense_deeper_stub(): - model = get_conv_dense_model() - graph = Graph(model, False) + graph = get_conv_dense_model() + graph.weighted = False layer_num = graph.n_layers - graph.to_dense_deeper_model(5) + graph.to_dense_deeper_model(10) - assert graph.n_layers == layer_num + 2 + assert graph.n_layers == layer_num + 3 def test_dense_deeper(): - model = get_conv_dense_model() - graph = Graph(model, True) - graph.to_dense_deeper_model(5) + graph = get_conv_dense_model() + model = graph.produce_model() + graph = deepcopy(graph) + graph.to_dense_deeper_model(10) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + model.eval() + new_model.eval() + output1 = model(input_data) + output2 = new_model(input_data) - assert np.sum(np.abs(output1 - output2)) < 1e-4 + assert (output1 - output2).abs().sum() < 1e-4 def test_conv_wider_stub(): - model = get_add_skip_model() - graph = Graph(model, False) + graph = get_add_skip_model() + graph.weighted = False layer_num = graph.n_layers graph.to_wider_model(9, 3) @@ -64,43 +64,51 @@ def test_conv_wider_stub(): def test_conv_wider(): - model = get_concat_skip_model() - graph = Graph(model, True) + graph = get_concat_skip_model() + model = graph.produce_model() + graph = deepcopy(graph) graph.to_wider_model(5, 3) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) + + model.eval() + new_model.eval() - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + output1 = model(input_data) + output2 = new_model(input_data) - assert np.sum(np.abs(output1 - output2)) < 4e-1 + assert (output1 - output2).abs().sum() < 1e-1 def test_dense_wider_stub(): - model = get_add_skip_model() - graph = Graph(model, False) + graph = get_add_skip_model() + graph.weighted = False layer_num = graph.n_layers - graph.to_wider_model(19, 3) + graph.to_wider_model(26, 3) assert graph.n_layers == layer_num def test_dense_wider(): - model = get_add_skip_model() - graph = Graph(model, True) - graph.to_wider_model(19, 3) + graph = get_add_skip_model() + model = graph.produce_model() + graph = deepcopy(graph) + graph.to_wider_model(26, 3) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + model.eval() + new_model.eval() - assert np.sum(np.abs(output1 - output2)) < 1e-4 + output1 = model(input_data) + output2 = new_model(input_data) + + assert (output1 - output2).abs().sum() < 1e-4 def test_skip_add_over_pooling_stub(): - model = get_pooling_model() - graph = Graph(model, False) + graph = get_pooling_model() + graph.weighted = False layer_num = graph.n_layers graph.to_add_skip_model(1, 10) @@ -108,21 +116,25 @@ def test_skip_add_over_pooling_stub(): def test_skip_add_over_pooling(): - model = get_pooling_model() - graph = Graph(model, True) + graph = get_pooling_model() + model = graph.produce_model() + graph = deepcopy(graph) graph.to_add_skip_model(1, 10) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) + + model.eval() + new_model.eval() - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + output1 = model(input_data) + output2 = new_model(input_data) - assert np.sum(np.abs(output1 - output2)) < 1e-4 + assert (output1 - output2).abs().sum() < 1e-4 def test_skip_concat_over_pooling_stub(): - model = get_pooling_model() - graph = Graph(model, False) + graph = get_pooling_model() + graph.weighted = False layer_num = graph.n_layers graph.to_concat_skip_model(1, 14) @@ -130,31 +142,32 @@ def test_skip_concat_over_pooling_stub(): def test_skip_concat_over_pooling(): - model = get_pooling_model() - graph = Graph(model, True) + graph = get_pooling_model() + model = graph.produce_model() + graph = deepcopy(graph) graph.to_concat_skip_model(5, 10) graph.to_concat_skip_model(5, 10) - graph = Graph(graph.produce_model(), True) new_model = graph.produce_model() - input_data = get_conv_data() + input_data = torch.Tensor(get_conv_data()) - output1 = model.predict_on_batch(input_data).flatten() - output2 = new_model.predict_on_batch(input_data).flatten() + model.eval() + new_model.eval() - assert np.sum(np.abs(output1 - output2)) < 4e-1 + output1 = model(input_data) + output2 = new_model(input_data) + + assert (output1 - output2).abs().sum() < 1e-4 def test_extract_descriptor_add(): - model = get_add_skip_model() - descriptor = Graph(model).extract_descriptor() - assert descriptor.n_conv == 4 + descriptor = get_add_skip_model().extract_descriptor() + assert descriptor.n_conv == 5 assert descriptor.n_dense == 2 assert descriptor.skip_connections == [(2, 3, NetworkDescriptor.ADD_CONNECT), (3, 4, NetworkDescriptor.ADD_CONNECT)] def test_extract_descriptor_concat(): - model = get_concat_skip_model() - descriptor = Graph(model).extract_descriptor() + descriptor = get_concat_skip_model().extract_descriptor() assert descriptor.n_conv == 5 assert descriptor.n_dense == 2 assert descriptor.skip_connections == [(2, 3, NetworkDescriptor.CONCAT_CONNECT), @@ -162,21 +175,18 @@ def test_extract_descriptor_concat(): def test_deep_layer_ids(): - model = get_conv_dense_model() - graph = Graph(model, True) - assert len(graph.deep_layer_ids()) == 2 + graph = get_conv_dense_model() + assert len(graph.deep_layer_ids()) == 3 def test_wide_layer_ids(): - model = get_conv_dense_model() - graph = Graph(model, True) - assert len(graph.wide_layer_ids()) == 1 + graph = get_conv_dense_model() + assert len(graph.wide_layer_ids()) == 2 def test_skip_connection_layer_ids(): - model = get_conv_dense_model() - graph = Graph(model, True) - assert len(graph.skip_connection_layer_ids()) == 0 + graph = get_conv_dense_model() + assert len(graph.skip_connection_layer_ids()) == 1 def test_long_transform(): diff --git a/tests/test_layer_transformer.py b/tests/test_layer_transformer.py index 2c92411c5..713e6b208 100644 --- a/tests/test_layer_transformer.py +++ b/tests/test_layer_transformer.py @@ -1,23 +1,23 @@ +from autokeras.generator import DefaultClassifierGenerator from autokeras.layer_transformer import * -from autokeras.stub import to_stub_model -from tests.common import get_conv_model, get_add_skip_model, get_conv_dense_model +from tests.common import get_conv_dense_model def test_deeper_conv_block(): - model = to_stub_model(get_conv_model(), True) - layers = deeper_conv_block(model.layers[2], 3) + graph = DefaultClassifierGenerator(10, (28, 28, 3)).generate() + layers = deeper_conv_block(graph.layer_list[1], 3) assert len(layers) == constant.CONV_BLOCK_DISTANCE + 1 def test_dense_to_deeper_layer(): - a = StubDense(100, 'relu') - assert len(dense_to_deeper_block(a)) == 2 + a = StubDense(100, 100) + assert len(dense_to_deeper_block(a)) == 3 def test_dense_to_wider_layer(): - a = StubDense(5, 'relu') + a = StubDense(10, 5) a.set_weights((np.random.rand(10, 5), np.random.rand(5))) - b = StubDense(10, 'relu') + b = StubDense(5, 10) b.set_weights((np.random.rand(5, 10), np.random.rand(10))) assert isinstance(wider_pre_dense(a, 5), StubDense) @@ -25,7 +25,7 @@ def test_dense_to_wider_layer(): def test_wider_bn(): - bn_layer = StubBatchNormalization() + bn_layer = StubBatchNormalization(3) bn_layer.set_weights([np.ones(3, dtype=np.float32), np.zeros(3, dtype=np.float32), np.zeros(3, dtype=np.float32), @@ -34,24 +34,19 @@ def test_wider_bn(): assert new_bn_layer.get_weights()[0].shape[0] == 7 -def test_wider_weighted_add(): - layer = StubAdd() - layer.set_weights(get_add_skip_model().layers[13].get_weights()) - new_layer = wider_weighted_add(layer, 4) - assert isinstance(new_layer, StubAdd) - - def test_wider_next_dense(): - real_layer = get_conv_dense_model().layers[6] - layer = StubDense(real_layer.units, 'relu') + real_layer = get_conv_dense_model().layer_list[10] + layer = StubDense(real_layer.input_units, real_layer.units) layer.set_weights(real_layer.get_weights()) new_layer = wider_next_dense(layer, 3, 3, 3) - assert new_layer.get_weights()[0].shape == (6, 5) + assert new_layer.get_weights()[0].shape == (5, 6) def test_wider_conv(): - model = to_stub_model(get_conv_model(), True) + model = DefaultClassifierGenerator(10, (28, 28, 3)).generate().produce_model() + model.set_weight_to_graph() + graph = model.graph - assert isinstance(wider_pre_conv(model.layers[2], 3), StubConv) - assert isinstance(wider_bn(model.layers[3], 3, 3, 3), StubBatchNormalization) - assert isinstance(wider_next_conv(model.layers[6], 3, 3, 3), StubConv) + assert isinstance(wider_pre_conv(graph.layer_list[1], 3), StubConv) + assert isinstance(wider_bn(graph.layer_list[2], 3, 3, 3), StubBatchNormalization) + assert isinstance(wider_next_conv(graph.layer_list[6], 3, 3, 3), StubConv) diff --git a/tests/test_layers.py b/tests/test_layers.py index 6997de7ee..ebd0bbca9 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -1,17 +1,2 @@ -import os -import numpy as np -from keras import Input, Model -from keras.losses import mean_squared_error -from keras.models import load_model -from tensorflow.python.layers.utils import constant_value +# TODO: test produce output shape -from autokeras.layers import * -from tests.common import get_add_skip_model, clean_dir - - -def test_save_weighted_add(): - model = get_add_skip_model() - path = 'tests/resources/temp/m.h5' - model.save(path) - load_model(path) - os.remove(path) diff --git a/tests/test_net_transformer.py b/tests/test_net_transformer.py index 6222d6937..12701ac1c 100644 --- a/tests/test_net_transformer.py +++ b/tests/test_net_transformer.py @@ -1,37 +1,36 @@ from autokeras.generator import DefaultClassifierGenerator from autokeras.graph import Graph from autokeras.net_transformer import * -from autokeras.stub import to_stub_model from tests.common import get_conv_dense_model, get_pooling_model def test_wider(): - model = to_wider_graph(Graph(get_pooling_model(), False)) + model = to_wider_graph(get_pooling_model()) assert isinstance(model, Graph) def test_wider_dense(): - model = to_wider_graph(Graph(get_pooling_model(), False)) + model = to_wider_graph(get_pooling_model()) assert isinstance(model, Graph) def test_deeper(): - model = to_deeper_graph(Graph(get_conv_dense_model(), False)) + model = to_deeper_graph(get_conv_dense_model()) assert isinstance(model, Graph) def test_skip(): - model = to_skip_connection_graph(Graph(get_pooling_model(), False)) + model = to_skip_connection_graph(get_pooling_model()) assert isinstance(model, Graph) def test_transform(): - models = transform(Graph(get_pooling_model(), False)) + models = transform(get_pooling_model()) assert len(models) == constant.N_NEIGHBOURS def test_legal_graph(): - graph = Graph(get_pooling_model(), False) + graph = get_pooling_model() graph.to_add_skip_model(1, 5) assert legal_graph(graph) graph.to_add_skip_model(1, 5) @@ -39,7 +38,7 @@ def test_legal_graph(): def test_legal_graph2(): - graph = Graph(get_pooling_model(), False) + graph = get_pooling_model() graph.to_concat_skip_model(1, 5) assert legal_graph(graph) graph.to_concat_skip_model(1, 5) @@ -53,4 +52,4 @@ def test_default_transform(): # print(index, layer) graphs[0].produce_model() assert len(graphs) == 1 - assert len(graphs[0].layer_list) == 41 + assert len(graphs[0].layer_list) == 42 diff --git a/tests/test_search.py b/tests/test_search.py index 202db39e5..45ddebcb1 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -2,9 +2,8 @@ from unittest.mock import patch from autokeras.search import * -import numpy as np -from tests.common import clean_dir, MockProcess +from tests.common import clean_dir, MockProcess, get_processed_data default_test_path = 'tests/resources/temp' @@ -22,17 +21,13 @@ def mock_train(**_): @patch('autokeras.search.transform', side_effect=simple_transform) @patch('autokeras.search.ModelTrainer.train_model', side_effect=mock_train) def test_bayesian_searcher(_, _1): - x_train = np.random.rand(2, 28, 28, 1) - y_train = np.random.rand(2, 3) - x_test = np.random.rand(1, 28, 28, 1) - y_test = np.random.rand(1, 3) - + train_data, test_data = get_processed_data() clean_dir(default_test_path) - generator = BayesianSearcher(3, (28, 28, 1), verbose=False, path=default_test_path) + generator = BayesianSearcher(3, (28, 28, 3), verbose=False, path=default_test_path) constant.N_NEIGHBOURS = 1 constant.T_MIN = 0.8 for _ in range(2): - generator.search(x_train, y_train, x_test, y_test) + generator.search(train_data, test_data) clean_dir(default_test_path) assert len(generator.history) == 2 @@ -49,17 +44,14 @@ def test_search_tree(): @patch('autokeras.search.transform', side_effect=simple_transform) @patch('autokeras.search.ModelTrainer.train_model', side_effect=mock_train) def test_export_json(_, _1): - x_train = np.random.rand(2, 28, 28, 1) - y_train = np.random.rand(2, 3) - x_test = np.random.rand(1, 28, 28, 1) - y_test = np.random.rand(1, 3) + train_data, test_data = get_processed_data() clean_dir(default_test_path) - generator = BayesianSearcher(3, (28, 28, 1), verbose=False, path=default_test_path) + generator = BayesianSearcher(3, (28, 28, 3), verbose=False, path=default_test_path) constant.N_NEIGHBOURS = 1 constant.T_MIN = 0.8 for _ in range(3): - generator.search(x_train, y_train, x_test, y_test) + generator.search(train_data, test_data) file_path = os.path.join(default_test_path, 'test.json') generator.export_json(file_path) import json diff --git a/tests/test_stub.py b/tests/test_stub.py deleted file mode 100644 index 61f5722ea..000000000 --- a/tests/test_stub.py +++ /dev/null @@ -1,14 +0,0 @@ -from autokeras.stub import * -from tests.common import get_add_skip_model, get_concat_skip_model - - -def test_to_stub_model(): - model = get_add_skip_model() - stub_model = to_stub_model(model) - assert len(stub_model.layers) == 23 - - -def test_to_stub_model2(): - model = get_concat_skip_model() - stub_model = to_stub_model(model) - assert len(stub_model.layers) == 29 diff --git a/tests/test_utils.py b/tests/test_utils.py index 3562504ce..1745c0226 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,17 +1,10 @@ -from unittest.mock import patch -from autokeras.generator import RandomConvClassifierGenerator +from autokeras.generator import DefaultClassifierGenerator from autokeras.utils import * -import numpy as np - -def test_model_trainer(): - model = RandomConvClassifierGenerator(3, (28, 28, 1)).generate() - ModelTrainer(model, np.random.rand(2, 28, 28, 1), np.random.rand(2, 3), np.random.rand(1, 28, 28, 1), - np.random.rand(1, 3), False).train_model() +from tests.common import get_processed_data -def test_model_trainer_not_augmented(): - constant.DATA_AUGMENTATION = False - model = RandomConvClassifierGenerator(3, (28, 28, 1)).generate() - ModelTrainer(model, np.random.rand(2, 28, 28, 1), np.random.rand(2, 3), np.random.rand(1, 28, 28, 1), - np.random.rand(1, 3), False).train_model() +def test_model_trainer(): + model = DefaultClassifierGenerator(3, (28, 28, 3)).generate().produce_model() + train_data, test_data = get_processed_data() + ModelTrainer(model, train_data, test_data, False).train_model(max_iter_num=3)