From c81d4c2790e351524e741c52fd4746c3ea3e04fa Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 29 Dec 2016 13:25:22 +0100 Subject: [PATCH 01/57] flake8 linting --- elephas/hyperparam.py | 10 ++++++---- elephas/ml_model.py | 17 +++++++++++++---- elephas/spark_model.py | 10 +++++----- elephas/utils/functional_utils.py | 4 ++-- elephas/utils/rdd_utils.py | 4 ++-- 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index ad5cd73..7c1fc9f 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -8,6 +8,7 @@ # depend on hyperas, boto etc. is optional + class HyperParamModel(object): ''' HyperParamModel @@ -54,11 +55,11 @@ def best_models(self, nb_models, model, data, max_evals): trials_list = self.compute_trials(model, data, max_evals) num_trials = sum(len(trials) for trials in trials_list) if num_trials < nb_models: - nb_models = len(trials) + nb_models = len(trials_list) scores = [] for trials in trials_list: scores = scores + [trial.get('result').get('loss') for trial in trials] - cut_off = sorted(scores, reverse=True)[nb_models-1] + cut_off = sorted(scores, reverse=True)[nb_models - 1] model_list = [] for trials in trials_list: for trial in trials: @@ -68,6 +69,7 @@ def best_models(self, nb_models, model, data, max_evals): model_list.append(model) return model_list + class HyperasWorker(object): def __init__(self, bc_model, bc_max_evals): self.model_string = bc_model.value @@ -82,6 +84,6 @@ def minimize(self, dummy_iterator): random.seed(elem) rand_seed = np.random.randint(elem) - best_run = base_minimizer(model=None, data=None, algo=algo, max_evals=self.max_evals, - trials=trials, full_model_string=self.model_string, rseed=rand_seed) + base_minimizer(model=None, data=None, algo=algo, max_evals=self.max_evals, + trials=trials, full_model_string=self.model_string, rseed=rand_seed) yield trials diff --git a/elephas/ml_model.py b/elephas/ml_model.py index 0e7266e..aa697c2 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -2,18 +2,27 @@ import numpy as np -from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasFeaturesCol, HasLabelCol +from pyspark.ml.param.shared import HasOutputCol, HasFeaturesCol, HasLabelCol from pyspark.ml.util import keyword_only -from pyspark.sql import Row from pyspark.ml import Estimator, Model from pyspark.sql.types import StringType, DoubleType, StructField from keras.models import model_from_yaml from .spark_model import SparkModel -from .utils.rdd_utils import from_vector, to_vector +from .utils.rdd_utils import from_vector from .ml.adapter import df_to_simple_rdd -from .ml.params import * +from .ml.params import HasCategoricalLabels +from .ml.params import HasValidationSplit +from .ml.params import HasKerasModelConfig +from .ml.params import HasMode +from .ml.params import HasEpochs +from .ml.params import HasBatchSize +from .ml.params import HasFrequency +from .ml.params import HasVerbosity +from .ml.params import HasNumberOfClasses +from .ml.params import HasNumberOfWorkers +from .ml.params import HasOptimizerConfig from .optimizers import get diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 219b6a6..cf414c7 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -23,6 +23,7 @@ from keras.models import model_from_yaml + def get_server_weights(master_url='localhost:5000'): ''' Retrieve master weights from parameter server @@ -56,7 +57,6 @@ def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', freq custom_objects=None, *args, **kwargs): - self.spark_context = sc self._master_network = master_network if custom_objects is None: @@ -154,7 +154,8 @@ def update_parameters(): self.lock.acquire_write() constraints = self.master_network.constraints if len(constraints) == 0: - def empty(a): return a + def empty(a): + return a constraints = [empty for x in self.weights] self.weights = self.optimizer.get_updates(self.weights, constraints, delta) if self.mode == 'asynchronous': @@ -267,7 +268,6 @@ def __init__(self, yaml, train_config, frequency, master_url, master_optimizer, self.master_metrics = master_metrics self.custom_objects = custom_objects - def train(self, data_iterator): ''' Train a keras model on a worker and send asynchronous updates @@ -286,9 +286,9 @@ def train(self, data_iterator): nb_epoch = self.train_config['nb_epoch'] batch_size = self.train_config.get('batch_size') nb_train_sample = len(x_train[0]) - nb_batch = int(np.ceil(nb_train_sample/float(batch_size))) + nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) index_array = np.arange(nb_train_sample) - batches = [(i*batch_size, min(nb_train_sample, (i+1)*batch_size)) for i in range(0, nb_batch)] + batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] if self.frequency == 'epoch': for epoch in range(nb_epoch): diff --git a/elephas/utils/functional_utils.py b/elephas/utils/functional_utils.py index f14db99..4bb8617 100644 --- a/elephas/utils/functional_utils.py +++ b/elephas/utils/functional_utils.py @@ -9,7 +9,7 @@ def add_params(p1, p2): ''' res = [] for x, y in zip(p1, p2): - res.append(x+y) + res.append(x + y) return res @@ -19,7 +19,7 @@ def subtract_params(p1, p2): ''' res = [] for x, y in zip(p1, p2): - res.append(x-y) + res.append(x - y) return res diff --git a/elephas/utils/rdd_utils.py b/elephas/utils/rdd_utils.py index a9989fe..d952cb6 100644 --- a/elephas/utils/rdd_utils.py +++ b/elephas/utils/rdd_utils.py @@ -38,7 +38,7 @@ def from_labeled_point(rdd, categorical=False, nb_classes=None): labels = np.asarray(rdd.map(lambda lp: lp.label).collect(), dtype='int32') if categorical: if not nb_classes: - nb_classes = np.max(labels)+1 + nb_classes = np.max(labels) + 1 temp = np.zeros((len(labels), nb_classes)) for i, label in enumerate(labels): temp[i, label] = 1. @@ -60,7 +60,7 @@ def lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=None): if categorical: if not nb_classes: labels = np.asarray(lp_rdd.map(lambda lp: lp.label).collect(), dtype='int32') - nb_classes = np.max(labels)+1 + nb_classes = np.max(labels) + 1 rdd = lp_rdd.map(lambda lp: (from_vector(lp.features), encode_label(lp.label, nb_classes))) else: rdd = lp_rdd.map(lambda lp: (from_vector(lp.features), lp.label)) From 3c63a53256dac8925aef0c741bd084632e548ca2 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 29 Dec 2016 13:40:47 +0100 Subject: [PATCH 02/57] python3 support --- elephas/hyperparam.py | 4 +++- elephas/ml/params.py | 1 + elephas/utils/functional_utils.py | 1 + elephas/utils/rdd_utils.py | 1 + elephas/utils/rwlock.py | 1 + examples/hyperparam_optimization.py | 1 + examples/ml_pipeline_otto.py | 1 + setup.py | 1 + 8 files changed, 10 insertions(+), 1 deletion(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index 7c1fc9f..c682d7a 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -1,10 +1,12 @@ from __future__ import print_function +from __future__ import absolute_import from hyperopt import Trials, rand from hyperas.ensemble import VotingModel from hyperas.optim import get_hyperopt_model_string, base_minimizer import numpy as np from keras.models import model_from_yaml import six.moves.cPickle as pickle +from six.moves import range # depend on hyperas, boto etc. is optional @@ -79,7 +81,7 @@ def minimize(self, dummy_iterator): trials = Trials() algo = rand.suggest - elem = dummy_iterator.next() + elem = next(dummy_iterator) import random random.seed(elem) rand_seed = np.random.randint(elem) diff --git a/elephas/ml/params.py b/elephas/ml/params.py index e56fd8c..f7c2bca 100644 --- a/elephas/ml/params.py +++ b/elephas/ml/params.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from pyspark.ml.param.shared import Param, Params diff --git a/elephas/utils/functional_utils.py b/elephas/utils/functional_utils.py index 4bb8617..c58e842 100644 --- a/elephas/utils/functional_utils.py +++ b/elephas/utils/functional_utils.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import numpy as np +from six.moves import zip def add_params(p1, p2): diff --git a/elephas/utils/rdd_utils.py b/elephas/utils/rdd_utils.py index d952cb6..087a968 100644 --- a/elephas/utils/rdd_utils.py +++ b/elephas/utils/rdd_utils.py @@ -4,6 +4,7 @@ import numpy as np from ..mllib.adapter import to_vector, from_vector +from six.moves import zip def to_simple_rdd(sc, features, labels): diff --git a/elephas/utils/rwlock.py b/elephas/utils/rwlock.py index 680081b..42fa069 100644 --- a/elephas/utils/rwlock.py +++ b/elephas/utils/rwlock.py @@ -2,6 +2,7 @@ Many readers can hold the lock XOR one and only one writer http://majid.info/blog/a-reader-writer-lock-for-python/ """ +from __future__ import absolute_import import threading version = """$Id: 04-1.html,v 1.3 2006/12/05 17:45:12 majid Exp $""" diff --git a/examples/hyperparam_optimization.py b/examples/hyperparam_optimization.py index 6a1e152..a00eb69 100644 --- a/examples/hyperparam_optimization.py +++ b/examples/hyperparam_optimization.py @@ -1,4 +1,5 @@ from __future__ import print_function +from __future__ import absolute_import from hyperopt import Trials, STATUS_OK, tpe from hyperas import optim diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index a5a4bdb..d093b1c 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -1,5 +1,6 @@ from __future__ import print_function +from __future__ import absolute_import from pyspark.mllib.linalg import Vectors import numpy as np import random diff --git a/setup.py b/setup.py index 4fb25c8..4ded57f 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from setuptools import setup from setuptools import find_packages From 5c0c41660055cc8728353c7f6e78fd3f89002c02 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Sun, 11 Jun 2017 19:46:39 +0200 Subject: [PATCH 03/57] first draft of refactored parameter server --- elephas/hyperparam.py | 1 - elephas/parameter.py | 186 ++++++++++++++++++++++++++ elephas/parameter/__init__.py | 0 elephas/parameter/connector.py | 64 +++++++++ elephas/parameter/server.py | 161 +++++++++++++++++++++++ elephas/spark_model.py | 231 +++++---------------------------- elephas/utils/serialization.py | 11 ++ elephas/utils/sockets.py | 47 +++++++ elephas/worker.py | 106 +++++++++++++++ setup.py | 2 +- 10 files changed, 606 insertions(+), 203 deletions(-) create mode 100644 elephas/parameter.py create mode 100644 elephas/parameter/__init__.py create mode 100644 elephas/parameter/connector.py create mode 100644 elephas/parameter/server.py create mode 100644 elephas/utils/serialization.py create mode 100644 elephas/utils/sockets.py create mode 100644 elephas/worker.py diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index c682d7a..1b9aff0 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -7,7 +7,6 @@ from keras.models import model_from_yaml import six.moves.cPickle as pickle from six.moves import range - # depend on hyperas, boto etc. is optional diff --git a/elephas/parameter.py b/elephas/parameter.py new file mode 100644 index 0000000..88a0317 --- /dev/null +++ b/elephas/parameter.py @@ -0,0 +1,186 @@ +import socket +from threading import Lock, Thread + +from .utils.sockets import determine_master +from .utils.sockets import receive, send +from .utils.serialization import dict_to_model +from .utils.rwlock import RWLock + +import six.moves.cPickle as pickle +from flask import Flask, request +try: + import urllib.request as urllib2 +except ImportError: + import urllib2 +from multiprocessing import Process + + +class BaseParameterServer(object): + def __init__(self): + raise NotImplementedError + + def start(self): + raise NotImplementedError + + def stop(self): + raise NotImplementedError + + +class BaseParameterServerConnector(object): + def __init__(self): + raise NotImplementedError + + def update_parameters(self, delta): + raise NotImplementedError + + def get_parameters(self): + raise NotImplementedError + + +class HttpConnector(BaseParameterServerConnector): + def __init__(self): + self.master_url = determine_master() + + def get_parameters(self): + ''' + Retrieve master weights from parameter server + ''' + request = urllib2.Request('http://{0}/parameters'.format(self.master_url), + headers={'Content-Type': 'application/elephas'}) + ret = urllib2.urlopen(request).read() + weights = pickle.loads(ret) + return weights + + def update_parameters(self, delta): + '''Update master parameters with deltas from training process + ''' + request = urllib2.Request('http://{0}/update'.format(self.master_url), + pickle.dumps(delta, -1), headers={'Content-Type': 'application/elephas'}) + return urllib2.urlopen(request).read() + + +class HttpServer(BaseParameterServer): + + def __init__(self, master_network, optimizer, mode): + self.master_network = master_network + self.mode = mode + self.master_url = None + self.optimizer = optimizer + + self.lock = RWLock() + self.pickled_weights = None + self.weights = master_network.get_weights() + + def start(self): + '''Start parameter server''' + self.server = Process(target=self.start_flask_service) + self.server.start() + self.master_url = determine_master() + + def stop(self): + '''Terminate parameter server''' + self.server.terminate() + self.server.join() + + def start_flask_service(self): + '''Define service and run flask app''' + app = Flask(__name__) + self.app = app + + @app.route('/') + def home(): + return 'Elephas' + + @app.route('/parameters', methods=['GET']) + def handle_get_parameters(): + if self.mode == 'asynchronous': + self.lock.acquire_read() + self.pickled_weights = pickle.dumps(self.weights, -1) + pickled_weights = self.pickled_weights + if self.mode == 'asynchronous': + self.lock.release() + return pickled_weights + + @app.route('/update', methods=['POST']) + def handle_update_parameters(): + delta = pickle.loads(request.data) + if self.mode == 'asynchronous': + self.lock.acquire_write() + constraints = self.master_network.constraints + if len(constraints) == 0: + def empty(a): + return a + constraints = [empty for x in self.weights] + self.weights = self.optimizer.get_updates(self.weights, constraints, delta) + if self.mode == 'asynchronous': + self.lock.release() + return 'Update done' + + self.app.run(host='0.0.0.0', debug=True, + threaded=True, use_reloader=False) + + +class SocketServer(object): + def __init__(self, model, port): + self.model = dict_to_model(model) + self.port = port + self.socket = None + self.runs = False + self.connections = [] + self.lock = Lock() + + def start(self): + self.runs = True + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + sock.bind(('0.0.0.0', self.port)) + sock.listen(5) + self.socket = sock + + def stop(self): + self.runs = False + if self.socket: + for thread in self.connections: + thread.join() + del thread + self.socket.close() + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect(("localhost", self.port)) + sock.close() + except Exception: + pass + self.socket = None + self.connections = [] + + def update_parameters(self, socket): + data = receive(socket) + delta = data['delta'] + with self.lock: + weights = self.model.get_weights() + delta + self.model.set_weights(weights) + + def get_parameters(self, socket): + with self.lock: + weights = self.model.get_weights() + send(socket, weights) + + def action_listener(self, connection): + while self.runs: + get_or_update = connection.recv(1).decode() + if get_or_update == 'u': + self.set_parameters(connection) + elif get_or_update == 'g': + self.get_parameters(connection) + else: + print('Not a valid action') + + def run(self): + while self.runs: + try: + conn, addr = self.socket.accept() + thread = Thread(target=self.action_listener, args=(conn, addr)) + thread.start() + self.connections.append(thread) + except Exception: + pass diff --git a/elephas/parameter/__init__.py b/elephas/parameter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/elephas/parameter/connector.py b/elephas/parameter/connector.py new file mode 100644 index 0000000..1400cd0 --- /dev/null +++ b/elephas/parameter/connector.py @@ -0,0 +1,64 @@ +from __future__ import absolute_import +from __future__ import print_function + +import numpy as np +import socket +import six.moves.cPickle as pickle +try: + import urllib.request as urllib2 +except ImportError: + import urllib2 + +from ..utils.sockets import determine_master, send, receive + + +class BaseParameterServerConnector(object): + def __init__(self): + raise NotImplementedError + + def update_parameters(self, delta): + raise NotImplementedError + + def get_parameters(self): + raise NotImplementedError + + +class HttpConnector(BaseParameterServerConnector): + def __init__(self): + self.master_url = determine_master() + self.headers = {'Content-Type': 'application/elephas'} + + def get_parameters(self): + '''Retrieve master weights from parameter server + ''' + request = urllib2.Request('http://{0}/parameters'.format(self.master_url), + headers=self.headers) + pickled_weights = urllib2.urlopen(request).read() + return pickle.loads(pickled_weights) + + def update_parameters(self, delta): + '''Update master parameters with deltas from training process + ''' + request = urllib2.Request('http://{0}/update'.format(self.master_url), + pickle.dumps(delta, -1), headers=self.headers) + return urllib2.urlopen(request).read() + + +class SocketConnector(BaseParameterServerConnector): + def __init__(self, host='0.0.0.0', port=4000): + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.connect((host, port)) + + def get_parameters(self): + self.socket.sendall(b'g') + print('>>> Retrieving weights from socket') + return np.asarray(receive(self.socket)) + + def update_parameters(self, delta): + data = {} + # data['worker_id'] = self.get_worker_id() + data['delta'] = delta + self.socket.sendall(b'u') + print('>>> Start sending delta to socket') + send(self.socket, data) + print('>>> Done') diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py new file mode 100644 index 0000000..48ef843 --- /dev/null +++ b/elephas/parameter/server.py @@ -0,0 +1,161 @@ +import socket +from threading import Lock, Thread +import six.moves.cPickle as pickle +from flask import Flask, request +from multiprocessing import Process + +from ..utils.sockets import determine_master +from ..utils.sockets import receive, send +from ..utils.serialization import dict_to_model +from ..utils.rwlock import RWLock + + +class BaseParameterServer(object): + def __init__(self): + raise NotImplementedError + + def start(self): + raise NotImplementedError + + def stop(self): + raise NotImplementedError + + +class HttpServer(BaseParameterServer): + + def __init__(self, master_network, optimizer, mode): + self.master_network = master_network + self.mode = mode + self.master_url = None + self.optimizer = optimizer + + self.lock = RWLock() + self.pickled_weights = None + self.weights = master_network.get_weights() + + def start(self): + '''Start parameter server''' + self.server = Process(target=self.start_flask_service) + self.server.start() + self.master_url = determine_master() + + def stop(self): + '''Terminate parameter server''' + self.server.terminate() + self.server.join() + + def start_flask_service(self): + '''Define service and run flask app''' + app = Flask(__name__) + self.app = app + + @app.route('/') + def home(): + return 'Elephas' + + @app.route('/parameters', methods=['GET']) + def handle_get_parameters(): + if self.mode == 'asynchronous': + self.lock.acquire_read() + self.pickled_weights = pickle.dumps(self.weights, -1) + pickled_weights = self.pickled_weights + if self.mode == 'asynchronous': + self.lock.release() + return pickled_weights + + @app.route('/update', methods=['POST']) + def handle_update_parameters(): + delta = pickle.loads(request.data) + if self.mode == 'asynchronous': + self.lock.acquire_write() + constraints = self.master_network.constraints + if len(constraints) == 0: + def empty(a): + return a + constraints = [empty for x in self.weights] + self.weights = self.optimizer.get_updates(self.weights, constraints, delta) + if self.mode == 'asynchronous': + self.lock.release() + return 'Update done' + + self.app.run(host='0.0.0.0', debug=True, + threaded=True, use_reloader=False) + + +class SocketServer(object): + def __init__(self, model, port=4000): + self.model = dict_to_model(model) + self.port = port + self.socket = None + self.runs = False + self.connections = [] + self.lock = Lock() + self.thread = None + + def start(self): + if self.thread is not None: + self.stop() + self.thread = Thread(target=self.start_server) + self.thread.start() + + def stop(self): + self.stop_server() + self.thread.join() + self.thread = None + + def start_server(self): + self.runs = True + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + sock.bind(('0.0.0.0', self.port)) + sock.listen(5) + self.socket = sock + self.run() + + def stop_server(self): + self.runs = False + if self.socket: + for thread in self.connections: + thread.join() + del thread + self.socket.close() + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.connect(("localhost", self.port)) + sock.close() + except Exception: + pass + self.socket = None + self.connections = [] + + def update_parameters(self, socket): + data = receive(socket) + delta = data['delta'] + with self.lock: + weights = self.model.get_weights() + delta + self.model.set_weights(weights) + + def get_parameters(self, socket): + with self.lock: + weights = self.model.get_weights() + send(socket, weights) + + def action_listener(self, connection): + while self.runs: + get_or_update = connection.recv(1).decode() + if get_or_update == 'u': + self.set_parameters(connection) + elif get_or_update == 'g': + self.get_parameters(connection) + else: + print('Not a valid action') + + def run(self): + while self.runs: + try: + conn, addr = self.socket.accept() + thread = Thread(target=self.action_listener, args=(conn, addr)) + thread.start() + self.connections.append(thread) + except Exception: + pass diff --git a/elephas/spark_model.py b/elephas/spark_model.py index cf414c7..bd5b1f4 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -1,47 +1,15 @@ from __future__ import absolute_import from __future__ import print_function -import numpy as np -from itertools import tee -import socket -from multiprocessing import Process -import six.moves.cPickle as pickle -from six.moves import range -from flask import Flask, request -try: - import urllib.request as urllib2 -except ImportError: - import urllib2 - from pyspark.mllib.linalg import Matrix, Vector -from .utils.rwlock import RWLock -from .utils.functional_utils import subtract_params from .utils.rdd_utils import lp_to_simple_rdd +from .utils.serialization import model_to_dict from .mllib.adapter import to_matrix, from_matrix, to_vector, from_vector from .optimizers import SGD as default_optimizer - -from keras.models import model_from_yaml - - -def get_server_weights(master_url='localhost:5000'): - ''' - Retrieve master weights from parameter server - ''' - request = urllib2.Request('http://{0}/parameters'.format(master_url), - headers={'Content-Type': 'application/elephas'}) - ret = urllib2.urlopen(request).read() - weights = pickle.loads(ret) - return weights - - -def put_deltas_to_server(delta, master_url='localhost:5000'): - ''' - Update master parameters with deltas from training process - ''' - request = urllib2.Request('http://{0}/update'.format(master_url), - pickle.dumps(delta, -1), headers={'Content-Type': 'application/elephas'}) - return urllib2.urlopen(request).read() +from .worker import AsynchronousSparkWorker, SparkWorker +from .parameter.server import HttpServer, SocketServer +from .parameter.connector import HttpConnector, SocketConnector class SparkModel(object): @@ -49,12 +17,15 @@ class SparkModel(object): SparkModel is the main abstraction of elephas. Every other model should inherit from it. ''' - def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', frequency='epoch', + # TODO: Eliminate Spark context (only used for first broadcast, can be extracted) + def __init__(self, sc, master_network, optimizer=None, + mode='asynchronous', frequency='epoch', num_workers=4, - master_optimizer="adam", + master_optimizer="sgd", # TODO: other default master_loss="categorical_crossentropy", master_metrics=None, custom_objects=None, + parameter_server='http', *args, **kwargs): self.spark_context = sc @@ -72,19 +43,18 @@ def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', freq self.num_workers = num_workers self.weights = master_network.get_weights() self.pickled_weights = None - self.lock = RWLock() self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics self.custom_objects = custom_objects - @staticmethod - def determine_master(): - ''' - Get URL of parameter server, running on master - ''' - master_url = socket.gethostbyname(socket.gethostname()) + ':5000' - return master_url + # TODO: connector has to be initialized on workers + if parameter_server == 'http': + self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) + self.connector = HttpConnector() + else: + self.parameter_server = SocketServer(model_to_dict(self.master_network)) + self.connector = SocketConnector() def get_train_config(self, nb_epoch, batch_size, verbose, validation_split): @@ -110,92 +80,48 @@ def get_config(self): @property def master_network(self): - ''' Get master network ''' return self._master_network @master_network.setter def master_network(self, network): - ''' Set master network ''' self._master_network = network def start_server(self): - ''' Start parameter server''' - self.server = Process(target=self.start_service) - self.server.start() + self.parameter_server.start() def stop_server(self): - ''' Terminate parameter server''' - self.server.terminate() - self.server.join() - - def start_service(self): - ''' Define service and run flask app''' - app = Flask(__name__) - self.app = app - - @app.route('/') - def home(): - return 'Elephas' - - @app.route('/parameters', methods=['GET']) - def get_parameters(): - if self.mode == 'asynchronous': - self.lock.acquire_read() - self.pickled_weights = pickle.dumps(self.weights, -1) - pickled_weights = self.pickled_weights - if self.mode == 'asynchronous': - self.lock.release() - return pickled_weights - - @app.route('/update', methods=['POST']) - def update_parameters(): - delta = pickle.loads(request.data) - if self.mode == 'asynchronous': - self.lock.acquire_write() - constraints = self.master_network.constraints - if len(constraints) == 0: - def empty(a): - return a - constraints = [empty for x in self.weights] - self.weights = self.optimizer.get_updates(self.weights, constraints, delta) - if self.mode == 'asynchronous': - self.lock.release() - return 'Update done' - - self.app.run(host='0.0.0.0', debug=True, - threaded=True, use_reloader=False) + self.parameter_server.stop() def predict(self, data): - ''' - Get prediction probabilities for a numpy array of features + '''Get prediction probabilities for a numpy array of features ''' return self.master_network.predict(data) def predict_classes(self, data): - ''' - Predict classes for a numpy array of features + '''Predict classes for a numpy array of features ''' return self.master_network.predict_classes(data) def train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): - ''' - Train an elephas model. + # TODO: Make dataframe the standard, but support RDDs as well + '''Train an elephas model. ''' rdd = rdd.repartition(self.num_workers) - master_url = self.determine_master() if self.mode in ['asynchronous', 'synchronous', 'hogwild']: - self._train(rdd, nb_epoch, batch_size, verbose, validation_split, master_url) + self._train(rdd, nb_epoch, batch_size, verbose, validation_split) else: - print("""Choose from one of the modes: asynchronous, synchronous or hogwild""") + raise Exception("""Choose from one of the modes: asynchronous, synchronous or hogwild""") def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, - validation_split=0.1, master_url='localhost:5000'): + validation_split=0.1): ''' Protected train method to make wrapping of modes easier ''' - self.master_network.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.master_network.compile(optimizer=self.master_optimizer, + loss=self.master_loss, + metrics=self.master_metrics) if self.mode in ['asynchronous', 'hogwild']: self.start_server() yaml = self.master_network.to_yaml() @@ -203,11 +129,11 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, verbose, validation_split) if self.mode in ['asynchronous', 'hogwild']: worker = AsynchronousSparkWorker( - yaml, train_config, self.frequency, master_url, + yaml, self.connector, train_config, self.frequency, self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects ) rdd.mapPartitions(worker.train).collect() - new_parameters = get_server_weights(master_url) + new_parameters = self.connector.get_parameters() elif self.mode == 'synchronous': init = self.master_network.get_weights() parameters = self.spark_context.broadcast(init) @@ -222,103 +148,6 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, self.stop_server() -class SparkWorker(object): - ''' - Synchronous Spark worker. This code will be executed on workers. - ''' - def __init__(self, yaml, parameters, train_config, master_optimizer, master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.parameters = parameters - self.train_config = train_config - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.custom_objects = custom_objects - - def train(self, data_iterator): - ''' - Train a keras model on a worker - ''' - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - model.set_weights(self.parameters.value) - weights_before_training = model.get_weights() - if x_train.shape[0] > self.train_config.get('batch_size'): - model.fit(x_train, y_train, **self.train_config) - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - yield deltas - - -class AsynchronousSparkWorker(object): - ''' - Asynchronous Spark worker. This code will be executed on workers. - ''' - def __init__(self, yaml, train_config, frequency, master_url, master_optimizer, master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.train_config = train_config - self.frequency = frequency - self.master_url = master_url - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.custom_objects = custom_objects - - def train(self, data_iterator): - ''' - Train a keras model on a worker and send asynchronous updates - to parameter server - ''' - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - if x_train.size == 0: - return - - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - - nb_epoch = self.train_config['nb_epoch'] - batch_size = self.train_config.get('batch_size') - nb_train_sample = len(x_train[0]) - nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) - index_array = np.arange(nb_train_sample) - batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] - - if self.frequency == 'epoch': - for epoch in range(nb_epoch): - weights_before_training = get_server_weights(self.master_url) - model.set_weights(weights_before_training) - self.train_config['nb_epoch'] = 1 - if x_train.shape[0] > batch_size: - model.fit(x_train, y_train, **self.train_config) - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - put_deltas_to_server(deltas, self.master_url) - elif self.frequency == 'batch': - from keras.engine.training import slice_X - for epoch in range(nb_epoch): - if x_train.shape[0] > batch_size: - for (batch_start, batch_end) in batches: - weights_before_training = get_server_weights(self.master_url) - model.set_weights(weights_before_training) - batch_ids = index_array[batch_start:batch_end] - X = slice_X(x_train, batch_ids) - y = slice_X(y_train, batch_ids) - model.train_on_batch(X, y) - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - put_deltas_to_server(deltas, self.master_url) - else: - print('Choose frequency to be either batch or epoch') - yield [] - - class SparkMLlibModel(SparkModel): ''' MLlib model takes RDDs of LabeledPoints. Internally we just convert diff --git a/elephas/utils/serialization.py b/elephas/utils/serialization.py new file mode 100644 index 0000000..c83c233 --- /dev/null +++ b/elephas/utils/serialization.py @@ -0,0 +1,11 @@ +from keras.models import model_from_json + + +def model_to_dict(model): + return dict(model=model.to_json(), weights=model.get_weights()) + + +def dict_to_model(dict): + model = model_from_json(dict['model']) + model.set_weights(dict['weights']) + return model diff --git a/elephas/utils/sockets.py b/elephas/utils/sockets.py new file mode 100644 index 0000000..1f85c37 --- /dev/null +++ b/elephas/utils/sockets.py @@ -0,0 +1,47 @@ +from six.moves import cPickle as pickle +from socket import gethostbyname, gethostname + + +def determine_master(port=':5000'): + return gethostbyname(gethostname()) + port + + +def receive_all(socket, num_bytes): + """Reads `num_bytes` bytes from the specified socket. + # Arguments + socket: Open socket. + num_bytes: Number of bytes to read. + """ + buffer = '' + buffer_size = 0 + bytes_left = num_bytes + while buffer_size < num_bytes: + data = socket.recv(bytes_left) + delta = len(data) + buffer_size += delta + bytes_left -= delta + buffer += data + return buffer + + +def receive(socket, num_bytes=20): + """Fetch data frame from open socket + # Arguments + socket: Open socket. + num_bytes: Number of bytes to read. + """ + length = int(receive_all(socket, num_bytes).decode()) + serialized_data = receive_all(socket, length) + return pickle.loads(serialized_data) + + +def send(socket, data, num_bytes=20): + """Send data to specified socket. + # Arguments + socket: socket. Opened socket. + data: any. Data to send. + """ + pickled_data = pickle.dumps(data, -1) + length = str(len(pickled_data)).zfill(num_bytes) + socket.sendall(length.encode()) + socket.sendall(pickled_data) diff --git a/elephas/worker.py b/elephas/worker.py new file mode 100644 index 0000000..5c26cd8 --- /dev/null +++ b/elephas/worker.py @@ -0,0 +1,106 @@ +import numpy as np +from itertools import tee + +from .utils.functional_utils import subtract_params +from keras.models import model_from_yaml +from .parameter.connector import SocketConnector + + +class SparkWorker(object): + '''Synchronous Spark worker. This code will be executed on workers. + ''' + def __init__(self, yaml, parameters, train_config, master_optimizer, + master_loss, master_metrics, custom_objects): + self.yaml = yaml + self.parameters = parameters + self.train_config = train_config + self.master_optimizer = master_optimizer + self.master_loss = master_loss + self.master_metrics = master_metrics + self.custom_objects = custom_objects + + def train(self, data_iterator): + ''' + Train a keras model on a worker + ''' + feature_iterator, label_iterator = tee(data_iterator, 2) + x_train = np.asarray([x for x, y in feature_iterator]) + y_train = np.asarray([y for x, y in label_iterator]) + + model = model_from_yaml(self.yaml, self.custom_objects) + model.compile(optimizer=self.master_optimizer, + loss=self.master_loss, + metrics=self.master_metrics) + model.set_weights(self.parameters.value) + weights_before_training = model.get_weights() + if x_train.shape[0] > self.train_config.get('batch_size'): + model.fit(x_train, y_train, **self.train_config) + weights_after_training = model.get_weights() + deltas = subtract_params(weights_before_training, weights_after_training) + yield deltas + + +class AsynchronousSparkWorker(object): + ''' + Asynchronous Spark worker. This code will be executed on workers. + ''' + def __init__(self, yaml, ps_connector, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): + self.yaml = yaml + self.train_config = train_config + self.frequency = frequency + self.master_optimizer = master_optimizer + self.master_loss = master_loss + self.master_metrics = master_metrics + self.custom_objects = custom_objects + + def train(self, data_iterator): + ''' + Train a keras model on a worker and send asynchronous updates + to parameter server + ''' + feature_iterator, label_iterator = tee(data_iterator, 2) + x_train = np.asarray([x for x, y in feature_iterator]) + y_train = np.asarray([y for x, y in label_iterator]) + + if x_train.size == 0: + return + + model = model_from_yaml(self.yaml, self.custom_objects) + model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + + nb_epoch = self.train_config['nb_epoch'] + batch_size = self.train_config.get('batch_size') + nb_train_sample = x_train.shape[0] + nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) + index_array = np.arange(nb_train_sample) + batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] + + self.connector = SocketConnector() + + if self.frequency == 'epoch': + for epoch in range(nb_epoch): + weights_before_training = self.connector.get_parameters() + model.set_weights(weights_before_training) + self.train_config['nb_epoch'] = 1 + if x_train.shape[0] > batch_size: + model.fit(x_train, y_train, **self.train_config) + weights_after_training = model.get_weights() + deltas = subtract_params(weights_before_training, weights_after_training) + self.connector.update_parameters(deltas) + elif self.frequency == 'batch': + from keras.engine.training import slice_X + for epoch in range(nb_epoch): + if x_train.shape[0] > batch_size: + for (batch_start, batch_end) in batches: + weights_before_training = self.connector.get_parameters() + model.set_weights(weights_before_training) + batch_ids = index_array[batch_start:batch_end] + X = slice_X(x_train, batch_ids) + y = slice_X(y_train, batch_ids) + model.train_on_batch(X, y) + weights_after_training = model.get_weights() + deltas = subtract_params(weights_before_training, weights_after_training) + self.connector.update_parameters(deltas) + else: + print('Choose frequency to be either batch or epoch') + yield [] diff --git a/setup.py b/setup.py index 4ded57f..652f4b7 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ download_url='https://github.com/maxpumperla/elephas/tarball/0.3', author='Max Pumperla', author_email='max.pumperla@googlemail.com', - install_requires=['keras', 'hyperas', 'flask'], + install_requires=['keras', 'hyperas', 'flask', 'six'], license='MIT', packages=find_packages(), zip_safe=False) From 4050984ea03d0028188c27b666ad273a3ece0b39 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 12 Jun 2017 05:59:45 +0200 Subject: [PATCH 04/57] finally init test setup --- elephas/worker.py | 10 +++++++--- tests/conftest.py | 23 +++++++++++++++++++++++ tests/{ => ml}/__init__.py | 0 tests/ml/test_adapter.py | 0 tests/ml/test_params.py | 0 tests/mllib/__init__.py | 0 tests/mllib/test_adapter.py | 0 tests/parameter/__init__.py | 0 tests/parameter/test_connector.py | 0 tests/parameter/test_server.py | 0 tests/test_hyperparam.py | 8 ++++++++ tests/test_ml_model.py | 0 tests/test_optimizers.py | 0 tests/test_parameter.py | 0 tests/test_spark_model.py | 0 tests/test_worker.py | 0 tests/utils/test_functional_utils.py | 0 tests/utils/test_rdd_utils.py | 0 tests/utils/test_rwlock.py | 0 tests/utils/test_serialization.py | 0 tests/utils/test_sockets.py | 0 21 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 tests/conftest.py rename tests/{ => ml}/__init__.py (100%) create mode 100644 tests/ml/test_adapter.py create mode 100644 tests/ml/test_params.py create mode 100644 tests/mllib/__init__.py create mode 100644 tests/mllib/test_adapter.py create mode 100644 tests/parameter/__init__.py create mode 100644 tests/parameter/test_connector.py create mode 100644 tests/parameter/test_server.py create mode 100644 tests/test_hyperparam.py create mode 100644 tests/test_ml_model.py create mode 100644 tests/test_optimizers.py create mode 100644 tests/test_parameter.py create mode 100644 tests/test_spark_model.py create mode 100644 tests/test_worker.py create mode 100644 tests/utils/test_functional_utils.py create mode 100644 tests/utils/test_rdd_utils.py create mode 100644 tests/utils/test_rwlock.py create mode 100644 tests/utils/test_serialization.py create mode 100644 tests/utils/test_sockets.py diff --git a/elephas/worker.py b/elephas/worker.py index 5c26cd8..9197825 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -44,7 +44,9 @@ class AsynchronousSparkWorker(object): ''' Asynchronous Spark worker. This code will be executed on workers. ''' - def __init__(self, yaml, ps_connector, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): + def __init__(self, yaml, ps_connector, train_config, frequency, + master_optimizer, master_loss, master_metrics, + custom_objects): self.yaml = yaml self.train_config = train_config self.frequency = frequency @@ -73,8 +75,10 @@ def train(self, data_iterator): nb_train_sample = x_train.shape[0] nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) index_array = np.arange(nb_train_sample) - batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] - + batches = [ + (i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) + for i in range(0, nb_batch) + ] self.connector = SocketConnector() if self.frequency == 'epoch': diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..dc0acd2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +from pyspark import SparkContext, SparkConf +import pytest +import logging + + +def quiet_py4j(): + """ turn down spark logging for the test context """ + logger = logging.getLogger('py4j') + logger.setLevel(logging.WARN) + + +@pytest.fixture(scope="session") +def spark_context(request): + """ fixture for creating a spark context + Args: + request: pytest.FixtureRequest object + """ + conf = (SparkConf().setMaster("local[2]").setAppName("pytest-pyspark-local-testing")) + sc = SparkContext(conf=conf) + request.addfinalizer(lambda: sc.stop()) + + quiet_py4j() + return sc diff --git a/tests/__init__.py b/tests/ml/__init__.py similarity index 100% rename from tests/__init__.py rename to tests/ml/__init__.py diff --git a/tests/ml/test_adapter.py b/tests/ml/test_adapter.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mllib/__init__.py b/tests/mllib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mllib/test_adapter.py b/tests/mllib/test_adapter.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parameter/__init__.py b/tests/parameter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parameter/test_connector.py b/tests/parameter/test_connector.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/parameter/test_server.py b/tests/parameter/test_server.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_hyperparam.py b/tests/test_hyperparam.py new file mode 100644 index 0000000..f90c8d9 --- /dev/null +++ b/tests/test_hyperparam.py @@ -0,0 +1,8 @@ +import numpy as np +import pytest + +pytestmark = pytest.mark.usefixtures("spark_context") + + +def test_that_requires_sc(spark_context): + assert spark_context.parallelize(np.zeros((10, 10))).count() == 10 diff --git a/tests/test_ml_model.py b/tests/test_ml_model.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_parameter.py b/tests/test_parameter.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_worker.py b/tests/test_worker.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_functional_utils.py b/tests/utils/test_functional_utils.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_rdd_utils.py b/tests/utils/test_rdd_utils.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_rwlock.py b/tests/utils/test_rwlock.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_serialization.py b/tests/utils/test_serialization.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_sockets.py b/tests/utils/test_sockets.py new file mode 100644 index 0000000..e69de29 From 835ed5494d2d2fb4321ed8e6d07ef545898b01f7 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 11:46:32 +0200 Subject: [PATCH 05/57] fix typo; --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d8bcf12..b8c58d7 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,7 @@ hyperparam_model.minimize(model=model, data=data, max_evals=5) ## Distributed training of ensemble models -Building on the last section, it is possible to train ensemble models with elephas by means of running hyper-parameter optimization on large search spaces and defining a resulting voting classifier on the top-n performing models. With ```data``` and ```model```` defined as above, this is a simple as running +Building on the last section, it is possible to train ensemble models with elephas by means of running hyper-parameter optimization on large search spaces and defining a resulting voting classifier on the top-n performing models. With ```data``` and ```model``` defined as above, this is a simple as running ```python result = hyperparam_model.best_ensemble(nb_ensemble_models=10, model=model, data=data, max_evals=5) From 2b3b4e2945125567aee652640644591418da8181 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 11:54:17 +0200 Subject: [PATCH 06/57] clean --- elephas/parameter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elephas/parameter.py b/elephas/parameter.py index 88a0317..1cdf2cf 100644 --- a/elephas/parameter.py +++ b/elephas/parameter.py @@ -4,7 +4,7 @@ from .utils.sockets import determine_master from .utils.sockets import receive, send from .utils.serialization import dict_to_model -from .utils.rwlock import RWLock +from .utils.rwlock import RWLock as Lock import six.moves.cPickle as pickle from flask import Flask, request @@ -45,7 +45,7 @@ def get_parameters(self): ''' Retrieve master weights from parameter server ''' - request = urllib2.Request('http://{0}/parameters'.format(self.master_url), + request = urllib2.Request('http://{}/parameters'.format(self.master_url), headers={'Content-Type': 'application/elephas'}) ret = urllib2.urlopen(request).read() weights = pickle.loads(ret) @@ -54,7 +54,7 @@ def get_parameters(self): def update_parameters(self, delta): '''Update master parameters with deltas from training process ''' - request = urllib2.Request('http://{0}/update'.format(self.master_url), + request = urllib2.Request('http://{}/update'.format(self.master_url), pickle.dumps(delta, -1), headers={'Content-Type': 'application/elephas'}) return urllib2.urlopen(request).read() @@ -67,7 +67,7 @@ def __init__(self, master_network, optimizer, mode): self.master_url = None self.optimizer = optimizer - self.lock = RWLock() + self.lock = Lock() self.pickled_weights = None self.weights = master_network.get_weights() From b40ee32dda626c37c98e7b5bed3e95e3278cf8f5 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 11:56:10 +0200 Subject: [PATCH 07/57] pycharm --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b00c904..b25f561 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,5 @@ examples/.ipynb_checkpoints examples/metastore_db examples/*.csv + +.idea/ \ No newline at end of file From 1e934a55faa6cb4163183da3f18e744f7b344056 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 12:06:08 +0200 Subject: [PATCH 08/57] clean --- elephas/parameter.py | 186 ----------------------------------------- elephas/spark_model.py | 115 +------------------------ 2 files changed, 3 insertions(+), 298 deletions(-) delete mode 100644 elephas/parameter.py diff --git a/elephas/parameter.py b/elephas/parameter.py deleted file mode 100644 index 1cdf2cf..0000000 --- a/elephas/parameter.py +++ /dev/null @@ -1,186 +0,0 @@ -import socket -from threading import Lock, Thread - -from .utils.sockets import determine_master -from .utils.sockets import receive, send -from .utils.serialization import dict_to_model -from .utils.rwlock import RWLock as Lock - -import six.moves.cPickle as pickle -from flask import Flask, request -try: - import urllib.request as urllib2 -except ImportError: - import urllib2 -from multiprocessing import Process - - -class BaseParameterServer(object): - def __init__(self): - raise NotImplementedError - - def start(self): - raise NotImplementedError - - def stop(self): - raise NotImplementedError - - -class BaseParameterServerConnector(object): - def __init__(self): - raise NotImplementedError - - def update_parameters(self, delta): - raise NotImplementedError - - def get_parameters(self): - raise NotImplementedError - - -class HttpConnector(BaseParameterServerConnector): - def __init__(self): - self.master_url = determine_master() - - def get_parameters(self): - ''' - Retrieve master weights from parameter server - ''' - request = urllib2.Request('http://{}/parameters'.format(self.master_url), - headers={'Content-Type': 'application/elephas'}) - ret = urllib2.urlopen(request).read() - weights = pickle.loads(ret) - return weights - - def update_parameters(self, delta): - '''Update master parameters with deltas from training process - ''' - request = urllib2.Request('http://{}/update'.format(self.master_url), - pickle.dumps(delta, -1), headers={'Content-Type': 'application/elephas'}) - return urllib2.urlopen(request).read() - - -class HttpServer(BaseParameterServer): - - def __init__(self, master_network, optimizer, mode): - self.master_network = master_network - self.mode = mode - self.master_url = None - self.optimizer = optimizer - - self.lock = Lock() - self.pickled_weights = None - self.weights = master_network.get_weights() - - def start(self): - '''Start parameter server''' - self.server = Process(target=self.start_flask_service) - self.server.start() - self.master_url = determine_master() - - def stop(self): - '''Terminate parameter server''' - self.server.terminate() - self.server.join() - - def start_flask_service(self): - '''Define service and run flask app''' - app = Flask(__name__) - self.app = app - - @app.route('/') - def home(): - return 'Elephas' - - @app.route('/parameters', methods=['GET']) - def handle_get_parameters(): - if self.mode == 'asynchronous': - self.lock.acquire_read() - self.pickled_weights = pickle.dumps(self.weights, -1) - pickled_weights = self.pickled_weights - if self.mode == 'asynchronous': - self.lock.release() - return pickled_weights - - @app.route('/update', methods=['POST']) - def handle_update_parameters(): - delta = pickle.loads(request.data) - if self.mode == 'asynchronous': - self.lock.acquire_write() - constraints = self.master_network.constraints - if len(constraints) == 0: - def empty(a): - return a - constraints = [empty for x in self.weights] - self.weights = self.optimizer.get_updates(self.weights, constraints, delta) - if self.mode == 'asynchronous': - self.lock.release() - return 'Update done' - - self.app.run(host='0.0.0.0', debug=True, - threaded=True, use_reloader=False) - - -class SocketServer(object): - def __init__(self, model, port): - self.model = dict_to_model(model) - self.port = port - self.socket = None - self.runs = False - self.connections = [] - self.lock = Lock() - - def start(self): - self.runs = True - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - sock.bind(('0.0.0.0', self.port)) - sock.listen(5) - self.socket = sock - - def stop(self): - self.runs = False - if self.socket: - for thread in self.connections: - thread.join() - del thread - self.socket.close() - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - sock.connect(("localhost", self.port)) - sock.close() - except Exception: - pass - self.socket = None - self.connections = [] - - def update_parameters(self, socket): - data = receive(socket) - delta = data['delta'] - with self.lock: - weights = self.model.get_weights() + delta - self.model.set_weights(weights) - - def get_parameters(self, socket): - with self.lock: - weights = self.model.get_weights() - send(socket, weights) - - def action_listener(self, connection): - while self.runs: - get_or_update = connection.recv(1).decode() - if get_or_update == 'u': - self.set_parameters(connection) - elif get_or_update == 'g': - self.get_parameters(connection) - else: - print('Not a valid action') - - def run(self): - while self.runs: - try: - conn, addr = self.socket.accept() - thread = Thread(target=self.action_listener, args=(conn, addr)) - thread.start() - self.connections.append(thread) - except Exception: - pass diff --git a/elephas/spark_model.py b/elephas/spark_model.py index fefd124..0d56bec 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from __future__ import print_function -from pyspark.mllib.linalg import Matrix, Vector +import pyspark from .utils.rdd_utils import lp_to_simple_rdd from .utils.serialization import model_to_dict @@ -90,12 +90,7 @@ def start_server(self): self.parameter_server.start() def stop_server(self): -<<<<<<< HEAD self.parameter_server.stop() -======= - ''' Terminate parameter server''' - self.server.terminate() - self.server.join() def start_service(self): ''' Define service and run flask app''' @@ -137,7 +132,6 @@ def update_parameters(): self.app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False) ->>>>>>> master def predict(self, data): '''Get prediction probabilities for a numpy array of features @@ -198,109 +192,6 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, self.stop_server() -<<<<<<< HEAD -======= -class SparkWorker(object): - ''' - Synchronous Spark worker. This code will be executed on workers. - ''' - def __init__(self, yaml, parameters, train_config, master_optimizer, master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.parameters = parameters - self.train_config = train_config - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.custom_objects = custom_objects - - def train(self, data_iterator): - ''' - Train a keras model on a worker - ''' - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - model.set_weights(self.parameters.value) - weights_before_training = model.get_weights() - if x_train.shape[0] > self.train_config.get('batch_size'): - model.fit(x_train, y_train, **self.train_config) - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - yield deltas - - -class AsynchronousSparkWorker(object): - ''' - Asynchronous Spark worker. This code will be executed on workers. - ''' - def __init__(self, yaml, train_config, frequency, master_url, master_optimizer, master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.train_config = train_config - self.frequency = frequency - self.master_url = master_url - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.custom_objects = custom_objects - - - def train(self, data_iterator): - ''' - Train a keras model on a worker and send asynchronous updates - to parameter server - ''' - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - if x_train.size == 0: - return - - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - - nb_epoch = self.train_config['nb_epoch'] - batch_size = self.train_config.get('batch_size') - nb_train_sample = x_train.shape[0] - nb_batch = int(np.ceil(nb_train_sample/float(batch_size))) - index_array = np.arange(nb_train_sample) - batches = [(i*batch_size, min(nb_train_sample, (i+1)*batch_size)) for i in range(0, nb_batch)] - - if self.frequency == 'epoch': - for epoch in range(nb_epoch): - weights_before_training = get_server_weights(self.master_url) - model.set_weights(weights_before_training) - self.train_config['epochs'] = 1 - self.train_config['nb_epoch'] = 1 - if x_train.shape[0] > batch_size: - model.fit(x_train, y_train, **self.train_config) - self.train_config['nb_epoch'] = nb_epoch - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - put_deltas_to_server(deltas, self.master_url) - elif self.frequency == 'batch': - from keras.engine.training import slice_X - for epoch in range(nb_epoch): - if x_train.shape[0] > batch_size: - for (batch_start, batch_end) in batches: - weights_before_training = get_server_weights(self.master_url) - model.set_weights(weights_before_training) - batch_ids = index_array[batch_start:batch_end] - X = slice_X(x_train, batch_ids) - y = slice_X(y_train, batch_ids) - model.train_on_batch(X, y) - weights_after_training = model.get_weights() - deltas = subtract_params(weights_before_training, weights_after_training) - put_deltas_to_server(deltas, self.master_url) - else: - print('Choose frequency to be either batch or epoch') - yield [] - - ->>>>>>> master class SparkMLlibModel(SparkModel): ''' MLlib model takes RDDs of LabeledPoints. Internally we just convert @@ -328,9 +219,9 @@ def predict(self, mllib_data): ''' Predict probabilities for an RDD of features ''' - if isinstance(mllib_data, Matrix): + if isinstance(mllib_data, pyspark.mllib.linalg.Matrix): return to_matrix(self.master_network.predict(from_matrix(mllib_data))) - elif isinstance(mllib_data, Vector): + elif isinstance(mllib_data, pyspark.mllib.linalg.Vector): return to_vector(self.master_network.predict(from_vector(mllib_data))) else: print('Provide either an MLLib matrix or vector') From d2acd79dfc4b9b720166f68286aa3a6229bbd24e Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 12:18:55 +0200 Subject: [PATCH 09/57] doc strings, some refactoring --- elephas/spark_model.py | 77 ++++++++++-------------------------------- 1 file changed, 17 insertions(+), 60 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 0d56bec..a570cf3 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -13,10 +13,9 @@ class SparkModel(object): - ''' - SparkModel is the main abstraction of elephas. Every other model + """SparkModel is the main abstraction of elephas. Every other model should inherit from it. - ''' + """ # TODO: Eliminate Spark context (only used for first broadcast, can be extracted) def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', frequency='epoch', @@ -58,9 +57,8 @@ def __init__(self, sc, master_network, optimizer=None, def get_train_config(self, nb_epoch, batch_size, verbose, validation_split): - ''' - Get configuration of training parameters - ''' + """Get configuration of training parameters + """ train_config = {} train_config['nb_epoch'] = nb_epoch train_config['batch_size'] = batch_size @@ -69,13 +67,14 @@ def get_train_config(self, nb_epoch, batch_size, return train_config def get_config(self): - ''' - Get configuration of model parameters - ''' + """Get configuration of model parameters + """ model_config = {} model_config['model'] = self.master_network.get_config() model_config['optimizer'] = self.optimizer.get_config() model_config['mode'] = self.mode + model_config['frequency'] = self.frequency + model_config['num_workers'] = self.num_workers return model_config @property @@ -92,47 +91,6 @@ def start_server(self): def stop_server(self): self.parameter_server.stop() - def start_service(self): - ''' Define service and run flask app''' - app = Flask(__name__) - self.app = app - - @app.route('/') - def home(): - return 'Elephas' - - @app.route('/parameters', methods=['GET']) - def get_parameters(): - if self.mode == 'asynchronous': - self.lock.acquire_read() - self.pickled_weights = pickle.dumps(self.weights, -1) - pickled_weights = self.pickled_weights - if self.mode == 'asynchronous': - self.lock.release() - return pickled_weights - - @app.route('/update', methods=['POST']) - def update_parameters(): - delta = pickle.loads(request.data) - if self.mode == 'asynchronous': - self.lock.acquire_write() - - if not self.master_network.built: - self.master_network.build() - - base_constraint = lambda a: a - constraints = [base_constraint for x in self.weights] - - self.weights = self.optimizer.get_updates(self.weights, constraints, delta) - - if self.mode == 'asynchronous': - self.lock.release() - - return 'Update done' - - self.app.run(host='0.0.0.0', debug=True, - threaded=True, use_reloader=False) - def predict(self, data): '''Get prediction probabilities for a numpy array of features ''' @@ -187,16 +145,17 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, for delta in deltas: constraints = self.master_network.constraints new_parameters = self.optimizer.get_updates(self.weights, constraints, delta) + else: + raise ValueError("Unsupported mode {}".format(self.mode)) self.master_network.set_weights(new_parameters) if self.mode in ['asynchronous', 'hogwild']: self.stop_server() class SparkMLlibModel(SparkModel): - ''' - MLlib model takes RDDs of LabeledPoints. Internally we just convert + """MLlib model takes RDDs of LabeledPoints. Internally we just convert back to plain old pair RDDs and continue as in SparkModel - ''' + """ def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, master_optimizer="adam", master_loss="categorical_crossentropy", @@ -208,20 +167,18 @@ def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', freq def train(self, labeled_points, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1, categorical=False, nb_classes=None): - ''' - Train an elephas model on an RDD of LabeledPoints - ''' + """Train an elephas model on an RDD of LabeledPoints + """ rdd = lp_to_simple_rdd(labeled_points, categorical, nb_classes) rdd = rdd.repartition(self.num_workers) self._train(rdd, nb_epoch, batch_size, verbose, validation_split) def predict(self, mllib_data): - ''' - Predict probabilities for an RDD of features - ''' + """Predict probabilities for an RDD of features + """ if isinstance(mllib_data, pyspark.mllib.linalg.Matrix): return to_matrix(self.master_network.predict(from_matrix(mllib_data))) elif isinstance(mllib_data, pyspark.mllib.linalg.Vector): return to_vector(self.master_network.predict(from_vector(mllib_data))) else: - print('Provide either an MLLib matrix or vector') + raise ValueError('Provide either an MLLib matrix or vector, got {}'.format(mllib_data.__name__)) From 74fe0451cbfdbeda3ee18f75c2a6d704483efd65 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 12:23:06 +0200 Subject: [PATCH 10/57] clients --- elephas/parameter/{connector.py => client.py} | 8 +++++--- elephas/spark_model.py | 6 +++--- elephas/utils/sockets.py | 5 ++++- elephas/worker.py | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) rename elephas/parameter/{connector.py => client.py} (92%) diff --git a/elephas/parameter/connector.py b/elephas/parameter/client.py similarity index 92% rename from elephas/parameter/connector.py rename to elephas/parameter/client.py index 1400cd0..4fc0487 100644 --- a/elephas/parameter/connector.py +++ b/elephas/parameter/client.py @@ -12,7 +12,7 @@ from ..utils.sockets import determine_master, send, receive -class BaseParameterServerConnector(object): +class BaseParameterClient(object): def __init__(self): raise NotImplementedError @@ -23,7 +23,8 @@ def get_parameters(self): raise NotImplementedError -class HttpConnector(BaseParameterServerConnector): +class HttpClient(BaseParameterClient): + def __init__(self): self.master_url = determine_master() self.headers = {'Content-Type': 'application/elephas'} @@ -44,7 +45,8 @@ def update_parameters(self, delta): return urllib2.urlopen(request).read() -class SocketConnector(BaseParameterServerConnector): +class SocketClient(BaseParameterClient): + def __init__(self, host='0.0.0.0', port=4000): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.connect((host, port)) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index a570cf3..e2b64d7 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -9,7 +9,7 @@ from .optimizers import SGD as default_optimizer from .worker import AsynchronousSparkWorker, SparkWorker from .parameter.server import HttpServer, SocketServer -from .parameter.connector import HttpConnector, SocketConnector +from .parameter.client import HttpClient, SocketClient class SparkModel(object): @@ -50,10 +50,10 @@ def __init__(self, sc, master_network, optimizer=None, # TODO: connector has to be initialized on workers if parameter_server == 'http': self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) - self.connector = HttpConnector() + self.connector = HttpClient() else: self.parameter_server = SocketServer(model_to_dict(self.master_network)) - self.connector = SocketConnector() + self.connector = SocketClient() def get_train_config(self, nb_epoch, batch_size, verbose, validation_split): diff --git a/elephas/utils/sockets.py b/elephas/utils/sockets.py index 1f85c37..ce4423d 100644 --- a/elephas/utils/sockets.py +++ b/elephas/utils/sockets.py @@ -8,6 +8,7 @@ def determine_master(port=':5000'): def receive_all(socket, num_bytes): """Reads `num_bytes` bytes from the specified socket. + # Arguments socket: Open socket. num_bytes: Number of bytes to read. @@ -25,7 +26,8 @@ def receive_all(socket, num_bytes): def receive(socket, num_bytes=20): - """Fetch data frame from open socket + """Fetch data frame from open socket. + # Arguments socket: Open socket. num_bytes: Number of bytes to read. @@ -37,6 +39,7 @@ def receive(socket, num_bytes=20): def send(socket, data, num_bytes=20): """Send data to specified socket. + # Arguments socket: socket. Opened socket. data: any. Data to send. diff --git a/elephas/worker.py b/elephas/worker.py index 9197825..7faf3c3 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -3,7 +3,7 @@ from .utils.functional_utils import subtract_params from keras.models import model_from_yaml -from .parameter.connector import SocketConnector +from .parameter.client import SocketClient class SparkWorker(object): @@ -79,7 +79,7 @@ def train(self, data_iterator): (i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch) ] - self.connector = SocketConnector() + self.connector = SocketClient() if self.frequency == 'epoch': for epoch in range(nb_epoch): From 2030ea5e96ca3440a7b043ff3c43bcc7aa278d8a Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 12:57:42 +0200 Subject: [PATCH 11/57] more cleaning --- elephas/ml/__init__.py | 2 ++ elephas/ml/adapter.py | 24 +++++++-------- elephas/ml/params.py | 55 ++++++++++++++-------------------- elephas/ml_model.py | 2 +- elephas/mllib/__init__.py | 1 + elephas/mllib/adapter.py | 17 +++++++---- elephas/parameter/__init__.py | 2 ++ elephas/parameter/server.py | 39 ++++++++++++++---------- elephas/utils/__init__.py | 3 ++ elephas/utils/serialization.py | 11 +++++++ elephas/utils/sockets.py | 40 ++++++++++++++++--------- 11 files changed, 114 insertions(+), 82 deletions(-) diff --git a/elephas/ml/__init__.py b/elephas/ml/__init__.py index e69de29..a6390b6 100644 --- a/elephas/ml/__init__.py +++ b/elephas/ml/__init__.py @@ -0,0 +1,2 @@ +from .adapter import * +from .params import * \ No newline at end of file diff --git a/elephas/ml/adapter.py b/elephas/ml/adapter.py index 31d5e25..ecc9840 100644 --- a/elephas/ml/adapter.py +++ b/elephas/ml/adapter.py @@ -6,9 +6,8 @@ def to_data_frame(sc, features, labels, categorical=False): - ''' - Convert numpy arrays of features and labels into Spark DataFrame - ''' + """Convert numpy arrays of features and labels into Spark DataFrame + """ lp_rdd = to_labeled_point(sc, features, labels, categorical) sql_context = SQLContext(sc) df = sql_context.createDataFrame(lp_rdd) @@ -16,21 +15,20 @@ def to_data_frame(sc, features, labels, categorical=False): def from_data_frame(df, categorical=False, nb_classes=None): - ''' - Convert DataFrame back to pair of numpy arrays - ''' + """Convert DataFrame back to pair of numpy arrays + """ lp_rdd = df.rdd.map(lambda row: LabeledPoint(row.label, row.features)) features, labels = from_labeled_point(lp_rdd, categorical, nb_classes) return features, labels -def df_to_simple_rdd(df, categorical=False, nb_classes=None, featuresCol='features', labelCol='label'): - ''' - Convert DataFrame into RDD of pairs - ''' - sqlContext = df.sql_ctx - sqlContext.registerDataFrameAsTable(df, "temp_table") - selected_df = sqlContext.sql("SELECT {0} AS features, {1} as label from temp_table".format(featuresCol, labelCol)) +def df_to_simple_rdd(df, categorical=False, nb_classes=None, features_col='features', label_col='label'): + """Convert DataFrame into RDD of pairs + """ + sql_context = df.sql_ctx + sql_context.registerDataFrameAsTable(df, "temp_table") + selected_df = sql_context.sql( + "SELECT {0} AS features, {1} as label from temp_table".format(features_col, label_col)) lp_rdd = selected_df.rdd.map(lambda row: LabeledPoint(row.label, row.features)) rdd = lp_to_simple_rdd(lp_rdd, categorical, nb_classes) return rdd diff --git a/elephas/ml/params.py b/elephas/ml/params.py index f7c2bca..b76c7ab 100644 --- a/elephas/ml/params.py +++ b/elephas/ml/params.py @@ -3,11 +3,10 @@ class HasKerasModelConfig(Params): - ''' - Mandatory field: + """Mandatory field: Parameter mixin for Keras model yaml - ''' + """ def __init__(self): super(HasKerasModelConfig, self).__init__() self.keras_model_config = Param(self, "keras_model_config", "Serialized Keras model as yaml string") @@ -21,9 +20,8 @@ def get_keras_model_config(self): class HasOptimizerConfig(Params): - ''' - Parameter mixin for Elephas optimizer config - ''' + """Parameter mixin for Elephas optimizer config + """ def __init__(self): super(HasOptimizerConfig, self).__init__() self.optimizer_config = Param(self, "optimizer_config", "Serialized Elephas optimizer properties") @@ -37,9 +35,8 @@ def get_optimizer_config(self): class HasMode(Params): - ''' - Parameter mixin for Elephas mode - ''' + """Parameter mixin for Elephas mode + """ def __init__(self): super(HasMode, self).__init__() self.mode = Param(self, "mode", "Elephas mode") @@ -54,9 +51,8 @@ def get_mode(self): class HasFrequency(Params): - ''' - Parameter mixin for Elephas frequency - ''' + """Parameter mixin for Elephas frequency + """ def __init__(self): super(HasFrequency, self).__init__() self.frequency = Param(self, "frequency", "Elephas frequency") @@ -71,11 +67,10 @@ def get_frequency(self): class HasNumberOfClasses(Params): - ''' - Mandatory: + """Mandatory: Parameter mixin for number of classes - ''' + """ def __init__(self): super(HasNumberOfClasses, self).__init__() self.nb_classes = Param(self, "nb_classes", "number of classes") @@ -90,11 +85,10 @@ def get_nb_classes(self): class HasCategoricalLabels(Params): - ''' - Mandatory: + """Mandatory: Parameter mixin for setting categorical features - ''' + """ def __init__(self): super(HasCategoricalLabels, self).__init__() self.categorical = Param(self, "categorical", "Boolean to indicate if labels are categorical") @@ -109,9 +103,8 @@ def get_categorical_labels(self): class HasEpochs(Params): - ''' - Parameter mixin for number of epochs - ''' + """Parameter mixin for number of epochs + """ def __init__(self): super(HasEpochs, self).__init__() self.nb_epoch = Param(self, "nb_epoch", "Number of epochs to train") @@ -126,9 +119,8 @@ def get_nb_epoch(self): class HasBatchSize(Params): - ''' - Parameter mixin for batch size - ''' + """Parameter mixin for batch size + """ def __init__(self): super(HasBatchSize, self).__init__() self.batch_size = Param(self, "batch_size", "Batch size") @@ -143,9 +135,8 @@ def get_batch_size(self): class HasVerbosity(Params): - ''' - Parameter mixin for output verbosity - ''' + """Parameter mixin for output verbosity + """ def __init__(self): super(HasVerbosity, self).__init__() self.verbose = Param(self, "verbose", "Stdout verbosity") @@ -160,9 +151,8 @@ def get_verbosity(self): class HasValidationSplit(Params): - ''' - Parameter mixin for validation split percentage - ''' + """Parameter mixin for validation split percentage + """ def __init__(self): super(HasValidationSplit, self).__init__() self.validation_split = Param(self, "validation_split", "validation split percentage") @@ -177,9 +167,8 @@ def get_validation_split(self): class HasNumberOfWorkers(Params): - ''' - Parameter mixin for number of workers - ''' + """Parameter mixin for number of workers + """ def __init__(self): super(HasNumberOfWorkers, self).__init__() self.num_workers = Param(self, "num_workers", "number of workers") diff --git a/elephas/ml_model.py b/elephas/ml_model.py index 0967ab1..41e8f75 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -52,7 +52,7 @@ def _fit(self, df): Private fit method of the Estimator, which trains the model. ''' simple_rdd = df_to_simple_rdd(df, categorical=self.get_categorical_labels(), nb_classes=self.get_nb_classes(), - featuresCol=self.getFeaturesCol(), labelCol=self.getLabelCol()) + features_col=self.getFeaturesCol(), label_col=self.getLabelCol()) simple_rdd = simple_rdd.repartition(self.get_num_workers()) optimizer = None if self.get_optimizer_config() is not None: diff --git a/elephas/mllib/__init__.py b/elephas/mllib/__init__.py index e69de29..56ac82f 100644 --- a/elephas/mllib/__init__.py +++ b/elephas/mllib/__init__.py @@ -0,0 +1 @@ +from .adapter import * \ No newline at end of file diff --git a/elephas/mllib/adapter.py b/elephas/mllib/adapter.py index 8d0dd7e..20ee22b 100644 --- a/elephas/mllib/adapter.py +++ b/elephas/mllib/adapter.py @@ -4,28 +4,33 @@ def from_matrix(matrix): - ''' Convert MLlib Matrix to numpy array ''' + """Convert MLlib Matrix to numpy array """ return matrix.toArray() def to_matrix(np_array): - ''' Convert numpy array to MLlib Matrix ''' + """Convert numpy array to MLlib Matrix + """ if len(np_array.shape) == 2: return Matrices.dense(np_array.shape[0], np_array.shape[1], np_array.ravel()) else: - raise Exception("""An MLLib Matrix can only be created from a two-dimensional numpy array""") + raise Exception("An MLLib Matrix can only be created from a two-dimensional " + + "numpy array, got {}".format(len(np_array.shape))) def from_vector(vector): - ''' Convert MLlib Vector to numpy array ''' + """Convert MLlib Vector to numpy array + """ return vector.array def to_vector(np_array): - ''' Convert numpy array to MLlib Vector ''' + """Convert numpy array to MLlib Vector + """ if len(np_array.shape) == 1: return Vectors.dense(np_array) else: - raise Exception("""An MLLib Vector can only be created from a one-dimensional numpy array""") + raise Exception("An MLLib Vector can only be created from a one-dimensional " + + "numpy array, got {}".format(len(np_array.shape))) diff --git a/elephas/parameter/__init__.py b/elephas/parameter/__init__.py index e69de29..a62f33e 100644 --- a/elephas/parameter/__init__.py +++ b/elephas/parameter/__init__.py @@ -0,0 +1,2 @@ +from .server import * +from .client import * \ No newline at end of file diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index 48ef843..f52217b 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -7,7 +7,7 @@ from ..utils.sockets import determine_master from ..utils.sockets import receive, send from ..utils.serialization import dict_to_model -from ..utils.rwlock import RWLock +from ..utils.rwlock import RWLock as Lock class BaseParameterServer(object): @@ -15,9 +15,13 @@ def __init__(self): raise NotImplementedError def start(self): + """Start the parameter server instance. + """ raise NotImplementedError def stop(self): + """Terminate the parameter server instance. + """ raise NotImplementedError @@ -29,23 +33,28 @@ def __init__(self, master_network, optimizer, mode): self.master_url = None self.optimizer = optimizer - self.lock = RWLock() + self.lock = Lock() self.pickled_weights = None self.weights = master_network.get_weights() def start(self): - '''Start parameter server''' self.server = Process(target=self.start_flask_service) self.server.start() self.master_url = determine_master() def stop(self): - '''Terminate parameter server''' self.server.terminate() self.server.join() def start_flask_service(self): - '''Define service and run flask app''' + """Define Flask parameter server service. + + This HTTP server can do two things: get the current model + parameters and update model parameters. After registering + the `parameters` and `update` routes, the service will + get started. + + """ app = Flask(__name__) self.app = app @@ -104,12 +113,12 @@ def stop(self): self.thread = None def start_server(self): - self.runs = True sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.bind(('0.0.0.0', self.port)) sock.listen(5) self.socket = sock + self.runs = True self.run() def stop_server(self): @@ -128,27 +137,27 @@ def stop_server(self): self.socket = None self.connections = [] - def update_parameters(self, socket): - data = receive(socket) + def update_parameters(self, conn): + data = receive(conn) delta = data['delta'] with self.lock: weights = self.model.get_weights() + delta self.model.set_weights(weights) - def get_parameters(self, socket): + def get_parameters(self, conn): with self.lock: weights = self.model.get_weights() - send(socket, weights) + send(conn, weights) - def action_listener(self, connection): + def action_listener(self, conn): while self.runs: - get_or_update = connection.recv(1).decode() + get_or_update = conn.recv(1).decode() if get_or_update == 'u': - self.set_parameters(connection) + self.set_parameters(conn) elif get_or_update == 'g': - self.get_parameters(connection) + self.get_parameters(conn) else: - print('Not a valid action') + raise ValueError('Received invalid action') def run(self): while self.runs: diff --git a/elephas/utils/__init__.py b/elephas/utils/__init__.py index e69de29..2ebdb46 100644 --- a/elephas/utils/__init__.py +++ b/elephas/utils/__init__.py @@ -0,0 +1,3 @@ +from .functional_utils import * +from .rdd_utils import * +from .serialization import * \ No newline at end of file diff --git a/elephas/utils/serialization.py b/elephas/utils/serialization.py index c83c233..db5bc04 100644 --- a/elephas/utils/serialization.py +++ b/elephas/utils/serialization.py @@ -2,10 +2,21 @@ def model_to_dict(model): + """Turns a Keras model into a Python dictionary + + :param model: Keras model instance + :return: dictionary with model information + """ return dict(model=model.to_json(), weights=model.get_weights()) def dict_to_model(dict): + """Turns a Python dictionary with model architecture and weights + back into a Keras model + + :param dict: dictionary with `model` and `weights` keys. + :return: Keras model instantiated from dictionary + """ model = model_from_json(dict['model']) model.set_weights(dict['weights']) return model diff --git a/elephas/utils/sockets.py b/elephas/utils/sockets.py index ce4423d..5f2c544 100644 --- a/elephas/utils/sockets.py +++ b/elephas/utils/sockets.py @@ -2,17 +2,25 @@ from socket import gethostbyname, gethostname -def determine_master(port=':5000'): +def determine_master(port=':4000'): + """Determine address of master so that workers + can connect to it. + + :param port: port on which the application runs + :return: Master address + """ return gethostbyname(gethostname()) + port -def receive_all(socket, num_bytes): +def _receive_all(socket, num_bytes): """Reads `num_bytes` bytes from the specified socket. - # Arguments - socket: Open socket. - num_bytes: Number of bytes to read. + :param socket: open socket instance + :param num_bytes: number of bytes to read + + :return: received data """ + buffer = '' buffer_size = 0 bytes_left = num_bytes @@ -26,23 +34,27 @@ def receive_all(socket, num_bytes): def receive(socket, num_bytes=20): - """Fetch data frame from open socket. + """Receive data frame from open socket. - # Arguments - socket: Open socket. - num_bytes: Number of bytes to read. + :param socket: open socket instance + :param num_bytes: number of bytes to read + + :return: received data """ - length = int(receive_all(socket, num_bytes).decode()) - serialized_data = receive_all(socket, length) + length = int(_receive_all(socket, num_bytes).decode()) + serialized_data = _receive_all(socket, length) return pickle.loads(serialized_data) def send(socket, data, num_bytes=20): """Send data to specified socket. - # Arguments - socket: socket. Opened socket. - data: any. Data to send. + + :param socket: open socket instance + :param data: data to send + :param num_bytes: number of bytes to read + + :return: received data """ pickled_data = pickle.dumps(data, -1) length = str(len(pickled_data)).zfill(num_bytes) From 1c6de1629ab8d33944203409c42e279dcd7bd570 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 13:07:01 +0200 Subject: [PATCH 12/57] more cleaning --- elephas/spark_model.py | 10 +++++----- elephas/utils/__init__.py | 4 +++- elephas/utils/functional_utils.py | 20 ++++++++------------ elephas/utils/rdd_utils.py | 22 +++++++++------------- elephas/worker.py | 27 ++++++++++++--------------- 5 files changed, 37 insertions(+), 46 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index e2b64d7..563c246 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -3,13 +3,13 @@ import pyspark -from .utils.rdd_utils import lp_to_simple_rdd -from .utils.serialization import model_to_dict -from .mllib.adapter import to_matrix, from_matrix, to_vector, from_vector +from .utils import lp_to_simple_rdd +from .utils import model_to_dict +from .mllib import to_matrix, from_matrix, to_vector, from_vector from .optimizers import SGD as default_optimizer from .worker import AsynchronousSparkWorker, SparkWorker -from .parameter.server import HttpServer, SocketServer -from .parameter.client import HttpClient, SocketClient +from .parameter import HttpServer, SocketServer +from .parameter import HttpClient, SocketClient class SparkModel(object): diff --git a/elephas/utils/__init__.py b/elephas/utils/__init__.py index 2ebdb46..0c657db 100644 --- a/elephas/utils/__init__.py +++ b/elephas/utils/__init__.py @@ -1,3 +1,5 @@ from .functional_utils import * from .rdd_utils import * -from .serialization import * \ No newline at end of file +from .serialization import * +from .sockets import * +from .rwlock import * \ No newline at end of file diff --git a/elephas/utils/functional_utils.py b/elephas/utils/functional_utils.py index c58e842..8f9e313 100644 --- a/elephas/utils/functional_utils.py +++ b/elephas/utils/functional_utils.py @@ -5,9 +5,8 @@ def add_params(p1, p2): - ''' - Add two lists of parameters - ''' + """Add two lists of parameters + """ res = [] for x, y in zip(p1, p2): res.append(x + y) @@ -15,9 +14,8 @@ def add_params(p1, p2): def subtract_params(p1, p2): - ''' - Subtract two lists of parameters - ''' + """Subtract two lists of parameters + """ res = [] for x, y in zip(p1, p2): res.append(x - y) @@ -25,10 +23,9 @@ def subtract_params(p1, p2): def get_neutral(array): - ''' - Get list of zero-valued numpy arrays for + """Get list of zero-valued numpy arrays for specified list of numpy arrays - ''' + """ res = [] for x in array: res.append(np.zeros_like(x)) @@ -36,9 +33,8 @@ def get_neutral(array): def divide_by(array_list, num_workers): - ''' - Divide a list of parameters by an integer num_workers. - ''' + """Divide a list of parameters by an integer num_workers. + """ for i, x in enumerate(array_list): array_list[i] /= num_workers return array_list diff --git a/elephas/utils/rdd_utils.py b/elephas/utils/rdd_utils.py index 7e83b1d..aaa8580 100644 --- a/elephas/utils/rdd_utils.py +++ b/elephas/utils/rdd_utils.py @@ -8,19 +8,17 @@ def to_simple_rdd(sc, features, labels): - ''' - Convert numpy arrays of features and labels into + """Convert numpy arrays of features and labels into an RDD of pairs. - ''' + """ pairs = [(x, y) for x, y in zip(features, labels)] return sc.parallelize(pairs) def to_labeled_point(sc, features, labels, categorical=False): - ''' - Convert numpy arrays of features and labels into + """Convert numpy arrays of features and labels into a LabeledPoint RDD - ''' + """ labeled_points = [] for x, y in zip(features, labels): if categorical: @@ -32,9 +30,8 @@ def to_labeled_point(sc, features, labels, categorical=False): def from_labeled_point(rdd, categorical=False, nb_classes=None): - ''' - Convert a LabeledPoint RDD back to a pair of numpy arrays - ''' + """Convert a LabeledPoint RDD back to a pair of numpy arrays + """ features = np.asarray(rdd.map(lambda lp: from_vector(lp.features)).collect()) labels = np.asarray(rdd.map(lambda lp: lp.label).collect(), dtype='int32') if categorical: @@ -48,16 +45,15 @@ def from_labeled_point(rdd, categorical=False, nb_classes=None): def encode_label(label, nb_classes): - ''' one-hot encoding of a label ''' + """One-hot encoding of a label """ encoded = np.zeros(nb_classes) encoded[int(label)] = 1. return encoded def lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=None): - ''' - Convert a LabeledPoint RDD into an RDD of feature-label pairs - ''' + """Convert a LabeledPoint RDD into an RDD of feature-label pairs + """ if categorical: if not nb_classes: labels = np.asarray(lp_rdd.map(lambda lp: lp.label).collect(), dtype='int32') diff --git a/elephas/worker.py b/elephas/worker.py index 7faf3c3..7c0ab34 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -1,14 +1,14 @@ import numpy as np from itertools import tee - -from .utils.functional_utils import subtract_params from keras.models import model_from_yaml -from .parameter.client import SocketClient + +from .utils import subtract_params +from .parameter import SocketClient class SparkWorker(object): - '''Synchronous Spark worker. This code will be executed on workers. - ''' + """Synchronous Spark worker. This code will be executed on workers. + """ def __init__(self, yaml, parameters, train_config, master_optimizer, master_loss, master_metrics, custom_objects): self.yaml = yaml @@ -20,9 +20,8 @@ def __init__(self, yaml, parameters, train_config, master_optimizer, self.custom_objects = custom_objects def train(self, data_iterator): - ''' - Train a keras model on a worker - ''' + """Train a keras model on a worker + """ feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) @@ -41,9 +40,8 @@ def train(self, data_iterator): class AsynchronousSparkWorker(object): - ''' - Asynchronous Spark worker. This code will be executed on workers. - ''' + """Asynchronous Spark worker. This code will be executed on workers. + """ def __init__(self, yaml, ps_connector, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): @@ -56,10 +54,9 @@ def __init__(self, yaml, ps_connector, train_config, frequency, self.custom_objects = custom_objects def train(self, data_iterator): - ''' - Train a keras model on a worker and send asynchronous updates + """Train a keras model on a worker and send asynchronous updates to parameter server - ''' + """ feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) @@ -106,5 +103,5 @@ def train(self, data_iterator): deltas = subtract_params(weights_before_training, weights_after_training) self.connector.update_parameters(deltas) else: - print('Choose frequency to be either batch or epoch') + raise ValueError('frequency parameter can be `epoch` or `batch, got {}'.format(self.frequency)) yield [] From 434b4a6f3df70c2b99b8440a0c8f814d67fdff4b Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 14:38:16 +0200 Subject: [PATCH 13/57] test --- .gitignore | 4 +++- elephas/hyperparam.py | 16 +++++++++++----- examples/hyperparam_optimization.py | 16 +++++++--------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b25f561..abeb199 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,6 @@ examples/metastore_db examples/*.csv -.idea/ \ No newline at end of file +.idea/ + +.pytest_cache \ No newline at end of file diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index 1b9aff0..a65a87a 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -11,9 +11,11 @@ class HyperParamModel(object): - ''' - HyperParamModel - ''' + """HyperParamModel + + Computes distributed hyper-parameter optimization using Hyperas and + Spark. + """ def __init__(self, sc, num_workers=4): self.spark_context = sc self.num_workers = num_workers @@ -26,7 +28,7 @@ def compute_trials(self, model, data, max_evals): hyperas_worker = HyperasWorker(bc_model, bc_max_evals) dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) dummy_rdd = dummy_rdd.repartition(self.num_workers) - trials_list = dummy_rdd.mapPartitions(hyperas_worker.minimize).collect() + trials_list = dummy_rdd.mapPartitions(hyperas_worker._minimize).collect() return trials_list @@ -72,11 +74,15 @@ def best_models(self, nb_models, model, data, max_evals): class HyperasWorker(object): + """ HyperasWorker + + Executes hyper-parameter search on each worker and returns results. + """ def __init__(self, bc_model, bc_max_evals): self.model_string = bc_model.value self.max_evals = bc_max_evals.value - def minimize(self, dummy_iterator): + def _minimize(self, dummy_iterator): trials = Trials() algo = rand.suggest diff --git a/examples/hyperparam_optimization.py b/examples/hyperparam_optimization.py index a00eb69..ea75881 100644 --- a/examples/hyperparam_optimization.py +++ b/examples/hyperparam_optimization.py @@ -1,22 +1,21 @@ from __future__ import print_function from __future__ import absolute_import -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim +from pyspark import SparkContext, SparkConf + +from hyperopt import STATUS_OK from hyperas.distributions import choice, uniform from elephas.hyperparam import HyperParamModel -from pyspark import SparkContext, SparkConf def data(): - ''' - Data providing function: + """Data providing function: Make sure to have every relevant import statement included here and return data as used in model function below. This function is separated from model() so that hyperopt won't reload data for each evaluation run. - ''' + """ from keras.datasets import mnist from keras.utils import np_utils (X_train, y_train), (X_test, y_test) = mnist.load_data() @@ -33,8 +32,7 @@ def data(): def model(X_train, Y_train, X_test, Y_test): - ''' - Model providing function: + """Model providing function: Create Keras model with double curly brackets dropped-in as needed. Return value has to be a valid python dictionary with two customary keys: @@ -42,7 +40,7 @@ def model(X_train, Y_train, X_test, Y_test): - status: Just use STATUS_OK and see hyperopt documentation if not feasible The last one is optional, though recommended, namely: - model: specify the model just created so that we can later use it again. - ''' + """ from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.optimizers import RMSprop From 955b0fde4b1046e0ae0b34d4aec56707f165aa05 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 14:46:20 +0200 Subject: [PATCH 14/57] test env and proper hyperparam test --- .gitignore | 4 ++- tests/test_hyperparam.py | 56 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index abeb199..55540bc 100644 --- a/.gitignore +++ b/.gitignore @@ -63,4 +63,6 @@ examples/*.csv .idea/ -.pytest_cache \ No newline at end of file +.pytest_cache + +test_env \ No newline at end of file diff --git a/tests/test_hyperparam.py b/tests/test_hyperparam.py index f90c8d9..d677715 100644 --- a/tests/test_hyperparam.py +++ b/tests/test_hyperparam.py @@ -1,8 +1,62 @@ import numpy as np import pytest +from hyperopt import STATUS_OK +from hyperas.distributions import choice, uniform -pytestmark = pytest.mark.usefixtures("spark_context") +from elephas.hyperparam import HyperParamModel + + +pytest.mark.usefixtures("spark_context") def test_that_requires_sc(spark_context): assert spark_context.parallelize(np.zeros((10, 10))).count() == 10 + + +def test_hyper_param_model(spark_context): + def data(): + from keras.datasets import mnist + from keras.utils import np_utils + (X_train, y_train), (X_test, y_test) = mnist.load_data() + X_train = X_train.reshape(60000, 784) + X_test = X_test.reshape(10000, 784) + X_train = X_train.astype('float32') + X_test = X_test.astype('float32') + X_train /= 255 + X_test /= 255 + nb_classes = 10 + Y_train = np_utils.to_categorical(y_train, nb_classes) + Y_test = np_utils.to_categorical(y_test, nb_classes) + return X_train, Y_train, X_test, Y_test + + def model(X_train, Y_train, X_test, Y_test): + from keras.models import Sequential + from keras.layers.core import Dense, Dropout, Activation + from keras.optimizers import RMSprop + + model = Sequential() + model.add(Dense(512, input_shape=(784,))) + model.add(Activation('relu')) + model.add(Dropout({{uniform(0, 1)}})) + model.add(Dense({{choice([256, 512, 1024])}})) + model.add(Activation('relu')) + model.add(Dropout({{uniform(0, 1)}})) + model.add(Dense(10)) + model.add(Activation('softmax')) + + rms = RMSprop() + model.compile(loss='categorical_crossentropy', optimizer=rms) + + model.fit(X_train, Y_train, + batch_size={{choice([64, 128])}}, + nb_epoch=1, + show_accuracy=True, + verbose=2, + validation_data=(X_test, Y_test)) + score, acc = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) + print('Test accuracy:', acc) + return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), + 'weights': pickle.dumps(model.get_weights())} + + hyperparam_model = HyperParamModel(spark_context) + hyperparam_model.minimize(model=model, data=data, max_evals=5) From 7335b6a659058d3d0845393d199effa43ff4d861 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 15:03:00 +0200 Subject: [PATCH 15/57] fix hyperas signature --- elephas/hyperparam.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index a65a87a..04b1ced 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -92,5 +92,7 @@ def _minimize(self, dummy_iterator): rand_seed = np.random.randint(elem) base_minimizer(model=None, data=None, algo=algo, max_evals=self.max_evals, - trials=trials, full_model_string=self.model_string, rseed=rand_seed) + trials=trials, full_model_string=self.model_string, rseed=rand_seed, + full_model_string=None, notebook_name=None, + verbose=True, stack=3) yield trials From 98141589319016b342e94c54ef7bc29d692f4819 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 15:42:34 +0200 Subject: [PATCH 16/57] more testing --- .gitignore | 3 ++- elephas/hyperparam.py | 20 +++++++++++--------- examples/hyperparam_optimization.py | 3 --- tests/test_hyperparam.py | 1 - 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 55540bc..001e228 100644 --- a/.gitignore +++ b/.gitignore @@ -65,4 +65,5 @@ examples/*.csv .pytest_cache -test_env \ No newline at end of file +test_env/ +venv/ \ No newline at end of file diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index 04b1ced..5a76e4e 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -21,11 +21,12 @@ def __init__(self, sc, num_workers=4): self.num_workers = num_workers def compute_trials(self, model, data, max_evals): - model_string = get_hyperopt_model_string(model, data) - bc_model = self.spark_context.broadcast(model_string) - bc_max_evals = self.spark_context.broadcast(max_evals) + model_string = get_hyperopt_model_string(model=model, data=data, functions=None, notebook_name=None, + verbose=False, stack=3) + #bc_model = self.spark_context.broadcast(model_string) + #bc_max_evals = self.spark_context.broadcast(max_evals) - hyperas_worker = HyperasWorker(bc_model, bc_max_evals) + hyperas_worker = HyperasWorker(model_string, max_evals) dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) dummy_rdd = dummy_rdd.repartition(self.num_workers) trials_list = dummy_rdd.mapPartitions(hyperas_worker._minimize).collect() @@ -33,6 +34,8 @@ def compute_trials(self, model, data, max_evals): return trials_list def minimize(self, model, data, max_evals): + global best_model_yaml, best_model_weights + trials_list = self.compute_trials(model, data, max_evals) best_val = 1e7 @@ -79,8 +82,8 @@ class HyperasWorker(object): Executes hyper-parameter search on each worker and returns results. """ def __init__(self, bc_model, bc_max_evals): - self.model_string = bc_model.value - self.max_evals = bc_max_evals.value + self.model_string = bc_model + self.max_evals = bc_max_evals def _minimize(self, dummy_iterator): trials = Trials() @@ -91,8 +94,7 @@ def _minimize(self, dummy_iterator): random.seed(elem) rand_seed = np.random.randint(elem) - base_minimizer(model=None, data=None, algo=algo, max_evals=self.max_evals, - trials=trials, full_model_string=self.model_string, rseed=rand_seed, - full_model_string=None, notebook_name=None, + base_minimizer(model=None, data=None, functions=None, algo=algo, max_evals=self.max_evals, + trials=trials, rseed=rand_seed, full_model_string=self.model_string, notebook_name=None, verbose=True, stack=3) yield trials diff --git a/examples/hyperparam_optimization.py b/examples/hyperparam_optimization.py index ea75881..bc0a245 100644 --- a/examples/hyperparam_optimization.py +++ b/examples/hyperparam_optimization.py @@ -1,6 +1,3 @@ -from __future__ import print_function -from __future__ import absolute_import - from pyspark import SparkContext, SparkConf from hyperopt import STATUS_OK diff --git a/tests/test_hyperparam.py b/tests/test_hyperparam.py index d677715..f246195 100644 --- a/tests/test_hyperparam.py +++ b/tests/test_hyperparam.py @@ -5,7 +5,6 @@ from elephas.hyperparam import HyperParamModel - pytest.mark.usefixtures("spark_context") From 26e2d4eef180e3c1fe3fa02a5a15eb725b933bad Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 16:01:26 +0200 Subject: [PATCH 17/57] remove broadcasting --- elephas/spark_model.py | 27 ++++++++++++--------------- elephas/worker.py | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 563c246..b6fea0c 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -17,7 +17,7 @@ class SparkModel(object): should inherit from it. """ # TODO: Eliminate Spark context (only used for first broadcast, can be extracted) - def __init__(self, sc, master_network, optimizer=None, + def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, master_optimizer="sgd", # TODO: other default @@ -27,7 +27,6 @@ def __init__(self, sc, master_network, optimizer=None, parameter_server='http', *args, **kwargs): - self.spark_context = sc self._master_network = master_network if custom_objects is None: custom_objects = {} @@ -47,7 +46,7 @@ def __init__(self, sc, master_network, optimizer=None, self.master_metrics = master_metrics self.custom_objects = custom_objects - # TODO: connector has to be initialized on workers + # TODO: clients have to be initialized on workers. Only init servers here, clients on workers if parameter_server == 'http': self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) self.connector = HttpClient() @@ -92,20 +91,20 @@ def stop_server(self): self.parameter_server.stop() def predict(self, data): - '''Get prediction probabilities for a numpy array of features - ''' + """Get prediction probabilities for a numpy array of features + """ return self.master_network.predict(data) def predict_classes(self, data): - '''Predict classes for a numpy array of features - ''' + """ Predict classes for a numpy array of features + """ return self.master_network.predict_classes(data) def train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): # TODO: Make dataframe the standard, but support RDDs as well - '''Train an elephas model. - ''' + """Train an elephas model. + """ rdd = rdd.repartition(self.num_workers) if self.mode in ['asynchronous', 'synchronous', 'hogwild']: @@ -115,9 +114,8 @@ def train(self, rdd, nb_epoch=10, batch_size=32, def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): - ''' - Protected train method to make wrapping of modes easier - ''' + """Protected train method to make wrapping of modes easier + """ self.master_network.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) @@ -134,10 +132,9 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, rdd.mapPartitions(worker.train).collect() new_parameters = self.connector.get_parameters() elif self.mode == 'synchronous': - init = self.master_network.get_weights() - parameters = self.spark_context.broadcast(init) + parameters = self.master_network.get_weights() worker = SparkWorker( - yaml, parameters, train_config, + yaml, parameters, train_config, self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects ) deltas = rdd.mapPartitions(worker.train).collect() diff --git a/elephas/worker.py b/elephas/worker.py index 7c0ab34..83b2068 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -30,7 +30,7 @@ def train(self, data_iterator): model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - model.set_weights(self.parameters.value) + model.set_weights(self.parameters) weights_before_training = model.get_weights() if x_train.shape[0] > self.train_config.get('batch_size'): model.fit(x_train, y_train, **self.train_config) @@ -42,7 +42,7 @@ def train(self, data_iterator): class AsynchronousSparkWorker(object): """Asynchronous Spark worker. This code will be executed on workers. """ - def __init__(self, yaml, ps_connector, train_config, frequency, + def __init__(self, yaml, client_mode, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): self.yaml = yaml From f9f76c58d8bc4de95a32a9ea5254023bc95f842b Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 17:09:41 +0200 Subject: [PATCH 18/57] functional test --- elephas/utils/functional_utils.py | 29 ++++++++++++++----- tests/utils/test_functional_utils.py | 42 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/elephas/utils/functional_utils.py b/elephas/utils/functional_utils.py index 8f9e313..83a11fb 100644 --- a/elephas/utils/functional_utils.py +++ b/elephas/utils/functional_utils.py @@ -4,36 +4,51 @@ from six.moves import zip -def add_params(p1, p2): - """Add two lists of parameters +def add_params(param_list_left, param_list_right): + """Add two lists of parameters one by one + + :param param_list_left: list of numpy arrays + :param param_list_right: list of numpy arrays + :return: list of numpy arrays """ res = [] - for x, y in zip(p1, p2): + for x, y in zip(param_list_left, param_list_right): res.append(x + y) return res -def subtract_params(p1, p2): +def subtract_params(param_list_left, param_list_right): """Subtract two lists of parameters + + :param param_list_left: list of numpy arrays + :param param_list_right: list of numpy arrays + :return: list of numpy arrays """ res = [] - for x, y in zip(p1, p2): + for x, y in zip(param_list_left, param_list_right): res.append(x - y) return res -def get_neutral(array): +def get_neutral(array_list): """Get list of zero-valued numpy arrays for specified list of numpy arrays + + :param array_list: list of numpy arrays + :return: list of zeros of same shape as input """ res = [] - for x in array: + for x in array_list: res.append(np.zeros_like(x)) return res def divide_by(array_list, num_workers): """Divide a list of parameters by an integer num_workers. + + :param array_list: + :param num_workers: + :return: """ for i, x in enumerate(array_list): array_list[i] /= num_workers diff --git a/tests/utils/test_functional_utils.py b/tests/utils/test_functional_utils.py index e69de29..b885e11 100644 --- a/tests/utils/test_functional_utils.py +++ b/tests/utils/test_functional_utils.py @@ -0,0 +1,42 @@ +import pytest +import numpy as np +from elephas.utils import functional_utils + +pytest.mark.usefixtures("spark_context") + + +def test_add_params(): + p1 = [np.ones((5, 5)) for i in range(10)] + p2 = [np.ones((5, 5)) for i in range(10)] + + res = functional_utils.add_params(p1, p2) + assert len(res) == 10 + for i in range(5): + for j in range(5): + assert res[0][i, j] == 2 + + +def test_subtract_params(): + p1 = [np.ones((5, 5)) for i in range(10)] + p2 = [np.ones((5, 5)) for i in range(10)] + + res = functional_utils.subtract_params(p1, p2) + + assert len(res) == 10 + for i in range(5): + for j in range(5): + assert res[0][i, j] == 0 + + +def test_get_neutral(): + x = [np.ones((3, 4))] + res = functional_utils.get_neutral(x) + assert res[0].shape == x[0].shape + assert res[0][0, 0] == 0 + + +def test_divide_by(): + x = [np.ones((3, 4))] + res = functional_utils.divide_by(x, num_workers=10) + assert res[0].shape == x[0].shape + assert res[0][0, 0] == 0.1 \ No newline at end of file From d272b3a95b17b2db6b731de05cae2f47b7a21b77 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 17:18:20 +0200 Subject: [PATCH 19/57] add port to master call --- elephas/parameter/client.py | 4 ++-- elephas/parameter/server.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/elephas/parameter/client.py b/elephas/parameter/client.py index 4fc0487..624e93a 100644 --- a/elephas/parameter/client.py +++ b/elephas/parameter/client.py @@ -25,8 +25,8 @@ def get_parameters(self): class HttpClient(BaseParameterClient): - def __init__(self): - self.master_url = determine_master() + def __init__(self, port=4000): + self.master_url = determine_master(port=port) self.headers = {'Content-Type': 'application/elephas'} def get_parameters(self): diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index f52217b..4f42415 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -27,11 +27,12 @@ def stop(self): class HttpServer(BaseParameterServer): - def __init__(self, master_network, optimizer, mode): + def __init__(self, master_network, optimizer, mode, port=4000): self.master_network = master_network self.mode = mode self.master_url = None self.optimizer = optimizer + self.port = port self.lock = Lock() self.pickled_weights = None @@ -40,7 +41,7 @@ def __init__(self, master_network, optimizer, mode): def start(self): self.server = Process(target=self.start_flask_service) self.server.start() - self.master_url = determine_master() + self.master_url = determine_master(self.port) def stop(self): self.server.terminate() @@ -87,11 +88,12 @@ def empty(a): self.lock.release() return 'Update done' - self.app.run(host='0.0.0.0', debug=True, + self.app.run(host='0.0.0.0', debug=True, port=self.port, threaded=True, use_reloader=False) -class SocketServer(object): +class SocketServer(BaseParameterServer): + def __init__(self, model, port=4000): self.model = dict_to_model(model) self.port = port From d92dab71b750657a6b4fba3e9ddeb6b001ca6075 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 18:09:10 +0200 Subject: [PATCH 20/57] rdd util test --- elephas/parameter/client.py | 16 +++---- elephas/utils/rdd_utils.py | 30 +++++++++++- tests/utils/test_rdd_utils.py | 88 +++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 10 deletions(-) diff --git a/elephas/parameter/client.py b/elephas/parameter/client.py index 624e93a..2327555 100644 --- a/elephas/parameter/client.py +++ b/elephas/parameter/client.py @@ -30,17 +30,17 @@ def __init__(self, port=4000): self.headers = {'Content-Type': 'application/elephas'} def get_parameters(self): - '''Retrieve master weights from parameter server - ''' - request = urllib2.Request('http://{0}/parameters'.format(self.master_url), + """Retrieve master weights from parameter server + """ + request = urllib2.Request('http://{}/parameters'.format(self.master_url), headers=self.headers) pickled_weights = urllib2.urlopen(request).read() return pickle.loads(pickled_weights) def update_parameters(self, delta): - '''Update master parameters with deltas from training process - ''' - request = urllib2.Request('http://{0}/update'.format(self.master_url), + """Update master parameters with deltas from training process + """ + request = urllib2.Request('http://{}/update'.format(self.master_url), pickle.dumps(delta, -1), headers=self.headers) return urllib2.urlopen(request).read() @@ -61,6 +61,6 @@ def update_parameters(self, delta): # data['worker_id'] = self.get_worker_id() data['delta'] = delta self.socket.sendall(b'u') - print('>>> Start sending delta to socket') + # print('>>> Start sending delta to socket') send(self.socket, data) - print('>>> Done') + # print('>>> Done') diff --git a/elephas/utils/rdd_utils.py b/elephas/utils/rdd_utils.py index aaa8580..2420875 100644 --- a/elephas/utils/rdd_utils.py +++ b/elephas/utils/rdd_utils.py @@ -10,6 +10,11 @@ def to_simple_rdd(sc, features, labels): """Convert numpy arrays of features and labels into an RDD of pairs. + + :param sc: Spark context + :param features: numpy array with features + :param labels: numpy array with labels + :return: Spark RDD with feature-label pairs """ pairs = [(x, y) for x, y in zip(features, labels)] return sc.parallelize(pairs) @@ -17,7 +22,13 @@ def to_simple_rdd(sc, features, labels): def to_labeled_point(sc, features, labels, categorical=False): """Convert numpy arrays of features and labels into - a LabeledPoint RDD + a LabeledPoint RDD for MLlib and ML integration. + + :param sc: Spark context + :param features: numpy array with features + :param labels: numpy array with labels + :param categorical: boolean, whether labels are already one-hot encoded or not + :return: LabeledPoint RDD with features and labels """ labeled_points = [] for x, y in zip(features, labels): @@ -31,6 +42,11 @@ def to_labeled_point(sc, features, labels, categorical=False): def from_labeled_point(rdd, categorical=False, nb_classes=None): """Convert a LabeledPoint RDD back to a pair of numpy arrays + + :param rdd: LabeledPoint RDD + :param categorical: boolean, if labels should be one-hot encode when returned + :param nb_classes: optional int, indicating the number of class labels + :return: pair of numpy arrays, features and labels """ features = np.asarray(rdd.map(lambda lp: from_vector(lp.features)).collect()) labels = np.asarray(rdd.map(lambda lp: lp.label).collect(), dtype='int32') @@ -45,7 +61,12 @@ def from_labeled_point(rdd, categorical=False, nb_classes=None): def encode_label(label, nb_classes): - """One-hot encoding of a label """ + """One-hot encoding of a single label + + :param label: class label (int or double without floating point digits) + :param nb_classes: int, number of total classes + :return: one-hot encoded vector + """ encoded = np.zeros(nb_classes) encoded[int(label)] = 1. return encoded @@ -53,6 +74,11 @@ def encode_label(label, nb_classes): def lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=None): """Convert a LabeledPoint RDD into an RDD of feature-label pairs + + :param lp_rdd: LabeledPoint RDD of features and labels + :param categorical: boolean, if labels should be one-hot encode when returned + :param nb_classes: int, number of total classes + :return: Spark RDD with feature-label pairs """ if categorical: if not nb_classes: diff --git a/tests/utils/test_rdd_utils.py b/tests/utils/test_rdd_utils.py index e69de29..283dd6d 100644 --- a/tests/utils/test_rdd_utils.py +++ b/tests/utils/test_rdd_utils.py @@ -0,0 +1,88 @@ +import pytest +import numpy as np +from elephas.utils import rdd_utils + +pytest.mark.usefixtures("spark_context") + + +def test_to_simple_rdd(spark_context): + features = np.ones((5, 10)) + labels = np.ones((5,)) + rdd = rdd_utils.to_simple_rdd(spark_context, features, labels) + + assert rdd.count() == 5 + first = rdd.first() + first[0].shape == (1,10) + first[1] == 1.0 + + +def test_to_labeled_rdd_categorical(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) + assert lp_rdd.count() == 2 + first = lp_rdd.first() + assert first.features.shape == (10,) + assert first.label == 2.0 + + +def test_to_labeled_rdd_not_categorical(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) + assert lp_rdd.count() == 2 + first = lp_rdd.first() + assert first.features.shape == (10,) + assert first.label == 2.0 + + +def test_from_labeled_rdd(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]).reshape((2,)) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) + + x, y = rdd_utils.from_labeled_point(lp_rdd, False, None) + assert x.shape == features.shape + assert y.shape == labels.shape + + +def test_from_labeled_rdd_categorical(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) + + x, y = rdd_utils.from_labeled_point(lp_rdd, True, 3) + assert x.shape == features.shape + assert y.shape == labels.shape + + +def test_encode_label(): + label = 3 + nb_classes = 10 + encoded = rdd_utils.encode_label(label, nb_classes) + assert len(encoded) == nb_classes + for i in range(10): + if i == label: + encoded[i] == 1 + else: + encoded[i] == 0 + + +def test_lp_to_simple_rdd_categorical(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) + + rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=True, nb_classes=3) + assert rdd.first()[0].shape == (10,) + assert rdd.first()[1].shape == (3,) + + +def test_lp_to_simple_rdd_not_categorical(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]).reshape((2,)) + lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) + + rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=3) + assert rdd.first()[0].shape == (10,) + assert rdd.first()[1] == 2.0 \ No newline at end of file From 07b8d2ab385a52741864e618b2e0524eb4adef5f Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 18:15:08 +0200 Subject: [PATCH 21/57] serialization test --- tests/utils/test_serialization.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/utils/test_serialization.py b/tests/utils/test_serialization.py index e69de29..a9f7319 100644 --- a/tests/utils/test_serialization.py +++ b/tests/utils/test_serialization.py @@ -0,0 +1,17 @@ +import pytest +from keras.models import Sequential +from elephas.utils import serialization + + +def test_model_to_dict(): + model = Sequential() + dict_model = serialization.model_to_dict(model) + assert dict_model.keys() == ['model', 'weights'] + + +def test_dict_to_model(): + model = Sequential() + dict_model = serialization.model_to_dict(model) + + recovered = serialization.dict_to_model(dict_model) + assert recovered.to_json() == model.to_json() From 8b41193d857557c70b64307cd1b7af9c4e1606cb Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 18:35:05 +0200 Subject: [PATCH 22/57] ml adapter tests --- examples/ml_pipeline_otto.py | 5 -- tests/conftest.py | 17 +++++- tests/ml/test_adapter.py | 52 +++++++++++++++++++ .../{test_connector.py => test_client.py} | 0 4 files changed, 68 insertions(+), 6 deletions(-) rename tests/parameter/{test_connector.py => test_client.py} (100%) diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index d093b1c..013740a 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -8,14 +8,10 @@ from pyspark import SparkContext, SparkConf from pyspark.sql import SQLContext from pyspark.ml.feature import StringIndexer, StandardScaler -from pyspark.mllib.evaluation import MulticlassMetrics from pyspark.ml import Pipeline from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation -from keras.layers.normalization import BatchNormalization -from keras.layers.advanced_activations import PReLU -from keras.utils import np_utils, generic_utils from elephas.ml_model import ElephasEstimator from elephas import optimizers as elephas_optimizers @@ -94,7 +90,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True): pipeline = Pipeline(stages=[string_indexer, scaler, estimator]) fitted_pipeline = pipeline.fit(train_df) -from pyspark.mllib.evaluation import MulticlassMetrics # Evaluate Spark model prediction = fitted_pipeline.transform(train_df) diff --git a/tests/conftest.py b/tests/conftest.py index dc0acd2..cd21eb1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ from pyspark import SparkContext, SparkConf +from pyspark.sql import SQLContext import pytest import logging @@ -11,7 +12,7 @@ def quiet_py4j(): @pytest.fixture(scope="session") def spark_context(request): - """ fixture for creating a spark context + """ fixture for creating a SparkContext Args: request: pytest.FixtureRequest object """ @@ -21,3 +22,17 @@ def spark_context(request): quiet_py4j() return sc + +@pytest.fixture(scope="session") +def sql_context(request): + """ fixture for creating a Spark SQLContext + Args: + request: pytest.FixtureRequest object + """ + conf = (SparkConf().setMaster("local[2]").setAppName("pytest-pyspark-local-testing")) + sc = SparkContext(conf=conf) + sql_context = SQLContext(sc) + request.addfinalizer(lambda: sc.stop()) + + quiet_py4j() + return sql_context \ No newline at end of file diff --git a/tests/ml/test_adapter.py b/tests/ml/test_adapter.py index e69de29..f633013 100644 --- a/tests/ml/test_adapter.py +++ b/tests/ml/test_adapter.py @@ -0,0 +1,52 @@ +import numpy as np +import pytest +pytest.mark.usefixtures("spark_context") +from elephas.ml import adapter + + +def test_to_data_frame(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]) + + data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=False) + assert data_frame.count() == 2 + +def test_to_data_frame_cat(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) + + data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=True) + assert data_frame.count() == 2 + + +def test_from_data_frame(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]).reshape((2,)) + + data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=False) + + x, y = adapter.from_data_frame(data_frame, categorical=False) + assert features.shape == x.shape + assert labels.shape == y.shape + + +def test_from_data_frame_cat(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) + + data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=True) + + x, y = adapter.from_data_frame(data_frame, categorical=True, nb_classes=3) + assert features.shape == x.shape + assert labels.shape == y.shape + + + +def test_df_to_simple_rdd(spark_context): + features = np.ones((2, 10)) + labels = np.asarray([[2.0], [1.0]]).reshape((2,)) + + data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=False) + + rdd = adapter.df_to_simple_rdd(data_frame, False) + assert rdd.count() == 2 diff --git a/tests/parameter/test_connector.py b/tests/parameter/test_client.py similarity index 100% rename from tests/parameter/test_connector.py rename to tests/parameter/test_client.py From f79113d483a2fba48a5c5fc71760158227041a86 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 18:39:00 +0200 Subject: [PATCH 23/57] update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 652f4b7..6f1a80a 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ download_url='https://github.com/maxpumperla/elephas/tarball/0.3', author='Max Pumperla', author_email='max.pumperla@googlemail.com', - install_requires=['keras', 'hyperas', 'flask', 'six'], + install_requires=['keras', 'tensorflow', 'hyperas', 'flask', 'six', 'pyspark'], license='MIT', packages=find_packages(), zip_safe=False) From e1a7a34a2e3bcc61f22c8a6928edaf686371992f Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 18:47:26 +0200 Subject: [PATCH 24/57] test fix --- tests/ml/test_adapter.py | 4 ++-- tests/utils/test_functional_utils.py | 10 +++++----- tests/utils/test_rdd_utils.py | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/ml/test_adapter.py b/tests/ml/test_adapter.py index f633013..2017db9 100644 --- a/tests/ml/test_adapter.py +++ b/tests/ml/test_adapter.py @@ -1,7 +1,7 @@ import numpy as np +from elephas.ml import adapter import pytest pytest.mark.usefixtures("spark_context") -from elephas.ml import adapter def test_to_data_frame(spark_context): @@ -11,6 +11,7 @@ def test_to_data_frame(spark_context): data_frame = adapter.to_data_frame(spark_context, features, labels, categorical=False) assert data_frame.count() == 2 + def test_to_data_frame_cat(spark_context): features = np.ones((2, 10)) labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) @@ -41,7 +42,6 @@ def test_from_data_frame_cat(spark_context): assert labels.shape == y.shape - def test_df_to_simple_rdd(spark_context): features = np.ones((2, 10)) labels = np.asarray([[2.0], [1.0]]).reshape((2,)) diff --git a/tests/utils/test_functional_utils.py b/tests/utils/test_functional_utils.py index b885e11..a388b91 100644 --- a/tests/utils/test_functional_utils.py +++ b/tests/utils/test_functional_utils.py @@ -6,8 +6,8 @@ def test_add_params(): - p1 = [np.ones((5, 5)) for i in range(10)] - p2 = [np.ones((5, 5)) for i in range(10)] + p1 = [np.ones((5, 5)) for _ in range(10)] + p2 = [np.ones((5, 5)) for _ in range(10)] res = functional_utils.add_params(p1, p2) assert len(res) == 10 @@ -17,8 +17,8 @@ def test_add_params(): def test_subtract_params(): - p1 = [np.ones((5, 5)) for i in range(10)] - p2 = [np.ones((5, 5)) for i in range(10)] + p1 = [np.ones((5, 5)) for _ in range(10)] + p2 = [np.ones((5, 5)) for _ in range(10)] res = functional_utils.subtract_params(p1, p2) @@ -39,4 +39,4 @@ def test_divide_by(): x = [np.ones((3, 4))] res = functional_utils.divide_by(x, num_workers=10) assert res[0].shape == x[0].shape - assert res[0][0, 0] == 0.1 \ No newline at end of file + assert res[0][0, 0] == 0.1 diff --git a/tests/utils/test_rdd_utils.py b/tests/utils/test_rdd_utils.py index 283dd6d..4b96298 100644 --- a/tests/utils/test_rdd_utils.py +++ b/tests/utils/test_rdd_utils.py @@ -12,8 +12,8 @@ def test_to_simple_rdd(spark_context): assert rdd.count() == 5 first = rdd.first() - first[0].shape == (1,10) - first[1] == 1.0 + assert first[0].shape == (10,) + assert first[1] == 1.0 def test_to_labeled_rdd_categorical(spark_context): @@ -63,9 +63,9 @@ def test_encode_label(): assert len(encoded) == nb_classes for i in range(10): if i == label: - encoded[i] == 1 + assert encoded[i] == 1 else: - encoded[i] == 0 + assert encoded[i] == 0 def test_lp_to_simple_rdd_categorical(spark_context): @@ -85,4 +85,4 @@ def test_lp_to_simple_rdd_not_categorical(spark_context): rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=3) assert rdd.first()[0].shape == (10,) - assert rdd.first()[1] == 2.0 \ No newline at end of file + assert rdd.first()[1] == 2.0 From 1b1a55416d645b231a47e1e3e05e1caa1ba3c244 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Mon, 13 Aug 2018 19:01:28 +0200 Subject: [PATCH 25/57] more cleaning --- elephas/hyperparam.py | 4 ++-- elephas/ml_model.py | 47 ++++++++++++++++++---------------------- elephas/optimizers.py | 46 ++++++++++++++++++--------------------- elephas/spark_model.py | 24 ++++++++++---------- tests/conftest.py | 3 ++- tests/test_hyperparam.py | 3 ++- 6 files changed, 60 insertions(+), 67 deletions(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index 5a76e4e..308f3b0 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -23,8 +23,8 @@ def __init__(self, sc, num_workers=4): def compute_trials(self, model, data, max_evals): model_string = get_hyperopt_model_string(model=model, data=data, functions=None, notebook_name=None, verbose=False, stack=3) - #bc_model = self.spark_context.broadcast(model_string) - #bc_max_evals = self.spark_context.broadcast(max_evals) + # bc_model = self.spark_context.broadcast(model_string) + # bc_max_evals = self.spark_context.broadcast(max_evals) hyperas_worker = HyperasWorker(model_string, max_evals) dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) diff --git a/elephas/ml_model.py b/elephas/ml_model.py index 41e8f75..33c4a00 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -2,9 +2,8 @@ import numpy as np -from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasFeaturesCol, HasLabelCol +from pyspark.ml.param.shared import HasOutputCol, HasFeaturesCol, HasLabelCol from pyspark import keyword_only -from pyspark.sql import Row from pyspark.ml import Estimator, Model from pyspark.sql.types import StringType, DoubleType, StructField @@ -27,14 +26,15 @@ from .optimizers import get -class ElephasEstimator(Estimator, HasCategoricalLabels, HasValidationSplit, HasKerasModelConfig, HasFeaturesCol, HasLabelCol, HasMode, HasEpochs, HasBatchSize, - HasFrequency, HasVerbosity, HasNumberOfClasses, HasNumberOfWorkers, HasOptimizerConfig, HasOutputCol): - ''' +class ElephasEstimator(Estimator, HasCategoricalLabels, HasValidationSplit, HasKerasModelConfig, HasFeaturesCol, + HasLabelCol, HasMode, HasEpochs, HasBatchSize, HasFrequency, HasVerbosity, HasNumberOfClasses, + HasNumberOfWorkers, HasOptimizerConfig, HasOutputCol): + """ SparkML Estimator implementation of an elephas model. This estimator takes all relevant arguments for model compilation and training. Returns a trained model in form of a SparkML Model, which is also a Transformer. - ''' + """ @keyword_only def __init__(self, **kwargs): super(ElephasEstimator, self).__init__() @@ -42,15 +42,13 @@ def __init__(self, **kwargs): @keyword_only def set_params(self, **kwargs): - ''' - Set all provided parameters, otherwise set defaults - ''' + """Set all provided parameters, otherwise set defaults + """ return self._set(**kwargs) def _fit(self, df): - ''' - Private fit method of the Estimator, which trains the model. - ''' + """Private fit method of the Estimator, which trains the model. + """ simple_rdd = df_to_simple_rdd(df, categorical=self.get_categorical_labels(), nb_classes=self.get_nb_classes(), features_col=self.getFeaturesCol(), label_col=self.getLabelCol()) simple_rdd = simple_rdd.repartition(self.get_num_workers()) @@ -76,10 +74,9 @@ def _fit(self, df): class ElephasTransformer(Model, HasKerasModelConfig, HasLabelCol, HasOutputCol): - ''' - SparkML Transformer implementation. Contains a trained model, + """SparkML Transformer implementation. Contains a trained model, with which new feature data can be transformed into labels. - ''' + """ @keyword_only def __init__(self, **kwargs): super(ElephasTransformer, self).__init__() @@ -88,22 +85,20 @@ def __init__(self, **kwargs): @keyword_only def set_params(self, **kwargs): - ''' - Set all provided parameters, otherwise set defaults - ''' + """Set all provided parameters, otherwise set defaults + """ return self._set(**kwargs) def get_model(self): return model_from_yaml(self.get_keras_model_config()) def _transform(self, df): - ''' - Private transform method of a Transformer. This serves as batch-prediction method for our purposes. - ''' - outputCol = self.getOutputCol() - labelCol = self.getLabelCol() + """Private transform method of a Transformer. This serves as batch-prediction method for our purposes. + """ + output_col = self.getOutputCol() + label_col = self.getLabelCol() new_schema = df.schema - new_schema.add(StructField(outputCol, StringType(), True)) + new_schema.add(StructField(output_col, StringType(), True)) rdd = df.rdd.coalesce(1) features = np.asarray(rdd.map(lambda x: from_vector(x.features)).collect()) @@ -118,7 +113,7 @@ def _transform(self, df): # results_rdd = rdd.zip(predictions).map(lambda pair: Row(features=to_vector(pair[0].features), # label=pair[0].label, prediction=float(pair[1]))) results_df = df.sql_ctx.createDataFrame(results_rdd, new_schema) - results_df = results_df.withColumn(outputCol, results_df[outputCol].cast(DoubleType())) - results_df = results_df.withColumn(labelCol, results_df[labelCol].cast(DoubleType())) + results_df = results_df.withColumn(output_col, results_df[output_col].cast(DoubleType())) + results_df = results_df.withColumn(label_col, results_df[label_col].cast(DoubleType())) return results_df diff --git a/elephas/optimizers.py b/elephas/optimizers.py index 11e8725..fbb49e0 100644 --- a/elephas/optimizers.py +++ b/elephas/optimizers.py @@ -1,9 +1,9 @@ -''' +""" This is essentially a copy of keras' optimizers.py. We have to modify the base class 'Optimizer' here, as the gradients will be provided by the Spark workers, not by one of the backends (Theano or Tensorflow). -''' +""" from __future__ import absolute_import from keras import backend as K from keras.optimizers import TFOptimizer @@ -15,38 +15,38 @@ def clip_norm(g, c, n): - ''' Clip gradients ''' + """Clip gradients + """ if c > 0: g = K.switch(K.ge(n, c), g * c / n, g) return g def kl_divergence(p, p_hat): - ''' Kullbach-Leibler divergence ''' + """Kullbach-Leibler divergence """ return p_hat - p + p * K.log(p / p_hat) class Optimizer(object): - ''' - Optimizer for elephas models, adapted from + """Optimizer for elephas models, adapted from respective Keras module. - ''' + """ def __init__(self, **kwargs): self.__dict__.update(kwargs) self.updates = [] def get_state(self): - ''' Get latest status of optimizer updates ''' + """ Get latest status of optimizer updates """ return [u[0].get_value() for u in self.updates] def set_state(self, value_list): - ''' Set current status of optimizer ''' + """ Set current status of optimizer """ assert len(self.updates) == len(value_list) for u, v in zip(self.updates, value_list): u[0].set_value(v) def get_updates(self, params, constraints, grads): - ''' Compute updates from gradients and constraints ''' + """ Compute updates from gradients and constraints """ raise NotImplementedError def get_gradients(self, grads, params): @@ -61,12 +61,12 @@ def get_gradients(self, grads, params): return K.shared(grads) def get_config(self): - ''' Get configuration dictionary ''' + """ Get configuration dictionary """ return {"class_name": self.__class__.__name__} class SGD(Optimizer): - ''' SGD, optionally with nesterov momentum ''' + """SGD, optionally with nesterov momentum """ def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, *args, **kwargs): super(SGD, self).__init__(**kwargs) @@ -101,9 +101,8 @@ def get_config(self): class RMSprop(Optimizer): - ''' - Reference: www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf - ''' + """Reference: www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf + """ def __init__(self, lr=0.001, rho=0.9, epsilon=1e-6, *args, **kwargs): super(RMSprop, self).__init__(**kwargs) self.__dict__.update(locals()) @@ -131,9 +130,8 @@ def get_config(self): class Adagrad(Optimizer): - ''' - Reference: http://www.magicbroom.info/Papers/DuchiHaSi10.pdf - ''' + """Reference: http://www.magicbroom.info/Papers/DuchiHaSi10.pdf + """ def __init__(self, lr=0.01, epsilon=1e-6, *args, **kwargs): super(Adagrad, self).__init__(**kwargs) self.__dict__.update(locals()) @@ -156,9 +154,8 @@ def get_config(self): class Adadelta(Optimizer): - ''' - Reference: http://arxiv.org/abs/1212.5701 - ''' + """Reference: http://arxiv.org/abs/1212.5701 + """ def __init__(self, lr=1.0, rho=0.95, epsilon=1e-6, *args, **kwargs): super(Adadelta, self).__init__(**kwargs) self.__dict__.update(locals()) @@ -190,10 +187,9 @@ def get_config(self): class Adam(Optimizer): - ''' - Reference: http://arxiv.org/abs/1412.6980v8 - Default parameters follow those provided in the original paper. - ''' + """Reference: http://arxiv.org/abs/1412.6980v8 + Default parameters follow those provided in the original paper. + """ def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, *args, **kwargs): super(Adam, self).__init__(**kwargs) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index b6fea0c..6b45eb5 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -54,26 +54,25 @@ def __init__(self, master_network, optimizer=None, self.parameter_server = SocketServer(model_to_dict(self.master_network)) self.connector = SocketClient() - def get_train_config(self, nb_epoch, batch_size, + @staticmethod + def get_train_config(nb_epoch, batch_size, verbose, validation_split): """Get configuration of training parameters """ - train_config = {} - train_config['nb_epoch'] = nb_epoch - train_config['batch_size'] = batch_size - train_config['verbose'] = verbose - train_config['validation_split'] = validation_split + train_config = {'nb_epoch': nb_epoch, + 'batch_size': batch_size, + 'verbose': verbose, + 'validation_split': validation_split} return train_config def get_config(self): """Get configuration of model parameters """ - model_config = {} - model_config['model'] = self.master_network.get_config() - model_config['optimizer'] = self.optimizer.get_config() - model_config['mode'] = self.mode - model_config['frequency'] = self.frequency - model_config['num_workers'] = self.num_workers + model_config = {'model': self.master_network.get_config(), + 'optimizer': self.optimizer.get_config(), + 'mode': self.mode, + 'frequency': self.frequency, + 'num_workers': self.num_workers} return model_config @property @@ -158,6 +157,7 @@ def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', freq master_loss="categorical_crossentropy", master_metrics=None, custom_objects=None): + # TODO signature is wrong SparkModel.__init__(self, sc, master_network, optimizer, mode, frequency, num_workers, master_optimizer=master_optimizer, master_loss=master_loss, master_metrics=master_metrics, custom_objects=custom_objects) diff --git a/tests/conftest.py b/tests/conftest.py index cd21eb1..df5ee50 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ def spark_context(request): quiet_py4j() return sc + @pytest.fixture(scope="session") def sql_context(request): """ fixture for creating a Spark SQLContext @@ -35,4 +36,4 @@ def sql_context(request): request.addfinalizer(lambda: sc.stop()) quiet_py4j() - return sql_context \ No newline at end of file + return sql_context diff --git a/tests/test_hyperparam.py b/tests/test_hyperparam.py index f246195..0387826 100644 --- a/tests/test_hyperparam.py +++ b/tests/test_hyperparam.py @@ -2,6 +2,7 @@ import pytest from hyperopt import STATUS_OK from hyperas.distributions import choice, uniform +import six.moves.cPickle as pickle from elephas.hyperparam import HyperParamModel @@ -52,7 +53,7 @@ def model(X_train, Y_train, X_test, Y_test): show_accuracy=True, verbose=2, validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) + score, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test accuracy:', acc) return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pickle.dumps(model.get_weights())} From 1b9668dc7460c7eabe170044152910c4f2dd80c6 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 10:06:22 +0200 Subject: [PATCH 26/57] docs and various fixes --- elephas/spark_model.py | 67 ++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 6b45eb5..e02cc05 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -6,26 +6,40 @@ from .utils import lp_to_simple_rdd from .utils import model_to_dict from .mllib import to_matrix, from_matrix, to_vector, from_vector -from .optimizers import SGD as default_optimizer +from .optimizers import SGD from .worker import AsynchronousSparkWorker, SparkWorker from .parameter import HttpServer, SocketServer from .parameter import HttpClient, SocketClient class SparkModel(object): - """SparkModel is the main abstraction of elephas. Every other model - should inherit from it. - """ - # TODO: Eliminate Spark context (only used for first broadcast, can be extracted) + def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', - num_workers=4, - master_optimizer="sgd", # TODO: other default + num_workers=None, + master_optimizer="sgd", master_loss="categorical_crossentropy", master_metrics=None, custom_objects=None, - parameter_server='http', + parameter_server_mode='http', *args, **kwargs): + """SparkModel + + Base class for distributed training on RDDs. Spark model takes a Keras + model as master network, an optimization scheme, a parallelisation mode + and an averaging frequency. + + :param master_network: Keras model (not compiled) + :param optimizer: Elephas optimizer + :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` + :param frequency: String, either `epoch` or `batch` + :param num_workers: int, number of workers used for training (defaults to None) + :param master_optimizer: Keras optimizer for master network + :param master_loss: Keras loss function for master network + :param master_metrics: Keras metrics used for master network + :param custom_objects: Keras custom objects + :param parameter_server_mode: String, either `http` or `socket` + """ self._master_network = master_network if custom_objects is None: @@ -33,7 +47,7 @@ def __init__(self, master_network, optimizer=None, if master_metrics is None: master_metrics = ["accuracy"] if optimizer is None: - self.optimizer = default_optimizer() + self.optimizer = SGD() else: self.optimizer = optimizer self.mode = mode @@ -45,14 +59,18 @@ def __init__(self, master_network, optimizer=None, self.master_loss = master_loss self.master_metrics = master_metrics self.custom_objects = custom_objects + self.parameter_server_mode = parameter_server_mode - # TODO: clients have to be initialized on workers. Only init servers here, clients on workers - if parameter_server == 'http': + # TODO: clients have to be initialized on workers, too. + if self.parameter_server_mode == 'http': self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) - self.connector = HttpClient() - else: + self.client = HttpClient() + elif self.parameter_server_mode == 'socket': self.parameter_server = SocketServer(model_to_dict(self.master_network)) - self.connector = SocketClient() + self.client = SocketClient() + else: + raise ValueError("Parameter server mode has to be either `http` or `socket`, " + "got {}".format(self.parameter_server_mode)) @staticmethod def get_train_config(nb_epoch, batch_size, @@ -104,15 +122,15 @@ def train(self, rdd, nb_epoch=10, batch_size=32, # TODO: Make dataframe the standard, but support RDDs as well """Train an elephas model. """ - rdd = rdd.repartition(self.num_workers) + if self.num_workers: + rdd = rdd.repartition(self.num_workers) if self.mode in ['asynchronous', 'synchronous', 'hogwild']: self._train(rdd, nb_epoch, batch_size, verbose, validation_split) else: - raise Exception("""Choose from one of the modes: asynchronous, synchronous or hogwild""") + raise Exception("Choose from one of the modes: asynchronous, synchronous or hogwild") - def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, - validation_split=0.1): + def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): """Protected train method to make wrapping of modes easier """ self.master_network.compile(optimizer=self.master_optimizer, @@ -121,15 +139,14 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, if self.mode in ['asynchronous', 'hogwild']: self.start_server() yaml = self.master_network.to_yaml() - train_config = self.get_train_config(nb_epoch, batch_size, - verbose, validation_split) + train_config = self.get_train_config(nb_epoch, batch_size, verbose, validation_split) if self.mode in ['asynchronous', 'hogwild']: worker = AsynchronousSparkWorker( - yaml, self.connector, train_config, self.frequency, + yaml, self.client, train_config, self.frequency, self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects ) rdd.mapPartitions(worker.train).collect() - new_parameters = self.connector.get_parameters() + new_parameters = self.client.get_parameters() elif self.mode == 'synchronous': parameters = self.master_network.get_weights() worker = SparkWorker( @@ -152,13 +169,13 @@ class SparkMLlibModel(SparkModel): """MLlib model takes RDDs of LabeledPoints. Internally we just convert back to plain old pair RDDs and continue as in SparkModel """ - def __init__(self, sc, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, + def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, master_optimizer="adam", master_loss="categorical_crossentropy", master_metrics=None, custom_objects=None): - # TODO signature is wrong - SparkModel.__init__(self, sc, master_network, optimizer, mode, frequency, num_workers, + + SparkModel.__init__(self, master_network, optimizer, mode, frequency, num_workers, master_optimizer=master_optimizer, master_loss=master_loss, master_metrics=master_metrics, custom_objects=custom_objects) From a9812d029c780e3fc1643713608451ab235be1ce Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 10:11:16 +0200 Subject: [PATCH 27/57] init clients on worker --- elephas/spark_model.py | 5 ++--- elephas/worker.py | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index e02cc05..f389ff8 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -61,7 +61,6 @@ def __init__(self, master_network, optimizer=None, self.custom_objects = custom_objects self.parameter_server_mode = parameter_server_mode - # TODO: clients have to be initialized on workers, too. if self.parameter_server_mode == 'http': self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) self.client = HttpClient() @@ -128,7 +127,7 @@ def train(self, rdd, nb_epoch=10, batch_size=32, if self.mode in ['asynchronous', 'synchronous', 'hogwild']: self._train(rdd, nb_epoch, batch_size, verbose, validation_split) else: - raise Exception("Choose from one of the modes: asynchronous, synchronous or hogwild") + raise ValueError("Choose from one of the modes: asynchronous, synchronous or hogwild") def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): """Protected train method to make wrapping of modes easier @@ -142,7 +141,7 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0. train_config = self.get_train_config(nb_epoch, batch_size, verbose, validation_split) if self.mode in ['asynchronous', 'hogwild']: worker = AsynchronousSparkWorker( - yaml, self.client, train_config, self.frequency, + yaml, self.parameter_server_mode, train_config, self.frequency, self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects ) rdd.mapPartitions(worker.train).collect() diff --git a/elephas/worker.py b/elephas/worker.py index 83b2068..76ddb9e 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -3,7 +3,7 @@ from keras.models import model_from_yaml from .utils import subtract_params -from .parameter import SocketClient +from .parameter import SocketClient, HttpClient class SparkWorker(object): @@ -42,10 +42,19 @@ def train(self, data_iterator): class AsynchronousSparkWorker(object): """Asynchronous Spark worker. This code will be executed on workers. """ - def __init__(self, yaml, client_mode, train_config, frequency, + def __init__(self, yaml, parameter_server_mode, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): self.yaml = yaml + if parameter_server_mode == 'http': + self.client = HttpClient() + elif parameter_server_mode == 'socket': + self.client = SocketClient() + else: + raise ValueError("Parameter server mode has to be either `http` or `socket`, " + "got {}".format(parameter_server_mode)) + + self.client = parameter_server_mode self.train_config = train_config self.frequency = frequency self.master_optimizer = master_optimizer @@ -76,24 +85,23 @@ def train(self, data_iterator): (i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch) ] - self.connector = SocketClient() if self.frequency == 'epoch': for epoch in range(nb_epoch): - weights_before_training = self.connector.get_parameters() + weights_before_training = self.client.get_parameters() model.set_weights(weights_before_training) self.train_config['nb_epoch'] = 1 if x_train.shape[0] > batch_size: model.fit(x_train, y_train, **self.train_config) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) - self.connector.update_parameters(deltas) + self.client.update_parameters(deltas) elif self.frequency == 'batch': from keras.engine.training import slice_X for epoch in range(nb_epoch): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: - weights_before_training = self.connector.get_parameters() + weights_before_training = self.client.get_parameters() model.set_weights(weights_before_training) batch_ids = index_array[batch_start:batch_end] X = slice_X(x_train, batch_ids) @@ -101,7 +109,7 @@ def train(self, data_iterator): model.train_on_batch(X, y) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) - self.connector.update_parameters(deltas) + self.client.update_parameters(deltas) else: raise ValueError('frequency parameter can be `epoch` or `batch, got {}'.format(self.frequency)) yield [] From f96b9557a0a1d24cec9784650b0f59396d192175 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 10:36:59 +0200 Subject: [PATCH 28/57] abc for client, server --- elephas/parameter/client.py | 5 ++++ elephas/parameter/server.py | 12 ++++++-- elephas/spark_model.py | 60 ++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 21 deletions(-) diff --git a/elephas/parameter/client.py b/elephas/parameter/client.py index 2327555..f62d859 100644 --- a/elephas/parameter/client.py +++ b/elephas/parameter/client.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from __future__ import print_function +import abc import numpy as np import socket import six.moves.cPickle as pickle @@ -13,12 +14,16 @@ class BaseParameterClient(object): + __metaclass__ = abc.ABCMeta + def __init__(self): raise NotImplementedError + @abc.abstractmethod def update_parameters(self, delta): raise NotImplementedError + @abc.abstractmethod def get_parameters(self): raise NotImplementedError diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index 4f42415..9f474b6 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -1,3 +1,4 @@ +import abc import socket from threading import Lock, Thread import six.moves.cPickle as pickle @@ -11,14 +12,18 @@ class BaseParameterServer(object): + __metaclass__ = abc.ABCMeta + def __init__(self): raise NotImplementedError + @abc.abstractmethod def start(self): """Start the parameter server instance. """ raise NotImplementedError + @abc.abstractmethod def stop(self): """Terminate the parameter server instance. """ @@ -38,8 +43,9 @@ def __init__(self, master_network, optimizer, mode, port=4000): self.pickled_weights = None self.weights = master_network.get_weights() - def start(self): self.server = Process(target=self.start_flask_service) + + def start(self): self.server.start() self.master_url = determine_master(self.port) @@ -155,7 +161,7 @@ def action_listener(self, conn): while self.runs: get_or_update = conn.recv(1).decode() if get_or_update == 'u': - self.set_parameters(conn) + self.update_parameters(conn) elif get_or_update == 'g': self.get_parameters(conn) else: @@ -169,4 +175,4 @@ def run(self): thread.start() self.connections.append(thread) except Exception: - pass + print("Failed to set up socket connection.") diff --git a/elephas/spark_model.py b/elephas/spark_model.py index f389ff8..c428eaf 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -116,20 +116,28 @@ def predict_classes(self, data): """ return self.master_network.predict_classes(data) - def train(self, rdd, nb_epoch=10, batch_size=32, - verbose=0, validation_split=0.1): - # TODO: Make dataframe the standard, but support RDDs as well - """Train an elephas model. + def fit(self, rdd, epochs=10, batch_size=32, + verbose=0, validation_split=0.1): + """ + Train an elephas model on an RDD. The Keras model configuration as specified + in the elephas model is sent to Spark workers, abd each worker will be trained + on their data partition. + + :param rdd: RDD with features and labels + :param epochs: number of epochs used for training + :param batch_size: batch size used for training + :param verbose: logging verbosity level (0, 1 or 2) + :param validation_split: percentage of data set aside for validation """ if self.num_workers: rdd = rdd.repartition(self.num_workers) if self.mode in ['asynchronous', 'synchronous', 'hogwild']: - self._train(rdd, nb_epoch, batch_size, verbose, validation_split) + self._fit(rdd, epochs, batch_size, verbose, validation_split) else: raise ValueError("Choose from one of the modes: asynchronous, synchronous or hogwild") - def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1): + def _fit(self, rdd, epochs, batch_size, verbose, validation_split): """Protected train method to make wrapping of modes easier """ self.master_network.compile(optimizer=self.master_optimizer, @@ -138,7 +146,10 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0. if self.mode in ['asynchronous', 'hogwild']: self.start_server() yaml = self.master_network.to_yaml() - train_config = self.get_train_config(nb_epoch, batch_size, verbose, validation_split) + train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) + frequency = self.frequency + + if self.mode in ['asynchronous', 'hogwild']: worker = AsynchronousSparkWorker( yaml, self.parameter_server_mode, train_config, self.frequency, @@ -165,18 +176,31 @@ def _train(self, rdd, nb_epoch=10, batch_size=32, verbose=0, validation_split=0. class SparkMLlibModel(SparkModel): - """MLlib model takes RDDs of LabeledPoints. Internally we just convert - back to plain old pair RDDs and continue as in SparkModel - """ + def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, - master_optimizer="adam", - master_loss="categorical_crossentropy", - master_metrics=None, - custom_objects=None): + master_optimizer="adam", master_loss="categorical_crossentropy", + master_metrics=None, custom_objects=None, parameter_server_mode='http', + *args, **kwargs): + """SparkMLlibModel + + The Spark MLlib model takes RDDs of LabeledPoints for training. + + :param master_network: Keras model (not compiled) + :param optimizer: Elephas optimizer + :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` + :param frequency: String, either `epoch` or `batch` + :param num_workers: int, number of workers used for training (defaults to None) + :param master_optimizer: Keras optimizer for master network + :param master_loss: Keras loss function for master network + :param master_metrics: Keras metrics used for master network + :param custom_objects: Keras custom objects + :param parameter_server_mode: String, either `http` or `socket + """ - SparkModel.__init__(self, master_network, optimizer, mode, frequency, num_workers, - master_optimizer=master_optimizer, master_loss=master_loss, master_metrics=master_metrics, - custom_objects=custom_objects) + SparkModel.__init__(self, master_network=master_network, optimizer=optimizer, mode=mode, frequency=frequency, + num_workers=num_workers, master_optimizer=master_optimizer, master_loss=master_loss, + master_metrics=master_metrics, custom_objects=custom_objects, + parameter_server_mode=parameter_server_mode, *args, **kwargs) def train(self, labeled_points, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1, categorical=False, nb_classes=None): @@ -184,7 +208,7 @@ def train(self, labeled_points, nb_epoch=10, batch_size=32, verbose=0, validatio """ rdd = lp_to_simple_rdd(labeled_points, categorical, nb_classes) rdd = rdd.repartition(self.num_workers) - self._train(rdd, nb_epoch, batch_size, verbose, validation_split) + self._fit(rdd, nb_epoch, batch_size, verbose, validation_split) def predict(self, mllib_data): """Predict probabilities for an RDD of features From 7e97c7ff41f3fcaa47981bc3d0f0fe1fa897c166 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 11:11:54 +0200 Subject: [PATCH 29/57] docs and cleaning for client/server --- elephas/parameter/client.py | 34 ++++++++++++++++------- elephas/parameter/server.py | 54 +++++++++++++++++++++++++++++++++---- elephas/spark_model.py | 1 - 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/elephas/parameter/client.py b/elephas/parameter/client.py index f62d859..accfa13 100644 --- a/elephas/parameter/client.py +++ b/elephas/parameter/client.py @@ -14,6 +14,11 @@ class BaseParameterClient(object): + """BaseParameterClient + + Parameter-server clients can do two things: retrieve the current parameters + from the corresponding server, and send updates (`delta`) to the server. + """ __metaclass__ = abc.ABCMeta def __init__(self): @@ -21,51 +26,60 @@ def __init__(self): @abc.abstractmethod def update_parameters(self, delta): + """Update master parameters with deltas from training process + """ raise NotImplementedError @abc.abstractmethod def get_parameters(self): + """Retrieve master weights from parameter server + """ raise NotImplementedError class HttpClient(BaseParameterClient): + """HttpClient + Uses HTTP protocol for communication with its corresponding parameter server, + namely HttpServer. The HTTP server provides two endpoints, `/parameters` to + get parameters and `/update` to update the server's parameters. + """ def __init__(self, port=4000): + BaseParameterClient.__init__(self) + self.master_url = determine_master(port=port) self.headers = {'Content-Type': 'application/elephas'} def get_parameters(self): - """Retrieve master weights from parameter server - """ request = urllib2.Request('http://{}/parameters'.format(self.master_url), headers=self.headers) pickled_weights = urllib2.urlopen(request).read() return pickle.loads(pickled_weights) def update_parameters(self, delta): - """Update master parameters with deltas from training process - """ request = urllib2.Request('http://{}/update'.format(self.master_url), pickle.dumps(delta, -1), headers=self.headers) return urllib2.urlopen(request).read() class SocketClient(BaseParameterClient): + """SocketClient + Uses a socket connection to communicate with an instance of `SocketServer`. + The socket server listens to two types of events. Those with a `g` prefix + indicate a get-request, those with a `u` indicate a parameter update. + """ def __init__(self, host='0.0.0.0', port=4000): + BaseParameterClient.__init__(self) + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.connect((host, port)) def get_parameters(self): self.socket.sendall(b'g') - print('>>> Retrieving weights from socket') return np.asarray(receive(self.socket)) def update_parameters(self, delta): - data = {} - # data['worker_id'] = self.get_worker_id() - data['delta'] = delta + data = {'delta': delta} self.socket.sendall(b'u') - # print('>>> Start sending delta to socket') send(self.socket, data) - # print('>>> Done') diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index 9f474b6..130f6d5 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -12,6 +12,11 @@ class BaseParameterServer(object): + """BaseParameterServer + + Parameter servers can be started and stopped. Server implementations have + to cater to the needs of their respective BaseParameterClient instances. + """ __metaclass__ = abc.ABCMeta def __init__(self): @@ -31,17 +36,43 @@ def stop(self): class HttpServer(BaseParameterServer): + """HttpServer + + Flask HTTP server. Defines two routes, `/parameters` to GET current + parameters held by this server, and `/update` which can be used to + POST updates. + """ + + def __init__(self, model, optimizer, mode, port=4000, debug=True, + threaded=True, use_reloader=False): + """Initializes and HTTP server from a serialized Keras model, elephas optimizer, + a parallelisation mode and a port to run the Flask application on. In + hogwild mode no read- or write-locks will be acquired, in asynchronous + mode this is the case. + + :param model: Serialized Keras model + :param optimizer: Elephas optimizer + :param mode: parallelization mode, either `asynchronous` or `hogwild` + :param port: int, port to run the application on + :param debug: boolean, Flask debug mode + :param threaded: boolean, Flask threaded application mode + :param use_reloader: boolean, Flask `use_reloader` argument + """ + BaseParameterServer.__init__(self) - def __init__(self, master_network, optimizer, mode, port=4000): - self.master_network = master_network + self.master_network = dict_to_model(model) self.mode = mode self.master_url = None self.optimizer = optimizer + self.port = port + self.debug = debug + self.threaded = threaded + self.use_reloader = use_reloader self.lock = Lock() self.pickled_weights = None - self.weights = master_network.get_weights() + self.weights = self.master_network.get_weights() self.server = Process(target=self.start_flask_service) @@ -94,13 +125,26 @@ def empty(a): self.lock.release() return 'Update done' - self.app.run(host='0.0.0.0', debug=True, port=self.port, - threaded=True, use_reloader=False) + self.app.run(host='0.0.0.0', debug=self.debug, port=self.port, + threaded=self.threaded, use_reloader=self.use_reloader) class SocketServer(BaseParameterServer): + """SocketServer + + A basic Python socket server + + """ def __init__(self, model, port=4000): + """Initializes a Socket server instance from a serializer Keras model + and a port to listen to. + + :param model: Serialized Keras model + :param port: int, port to run the socket on + """ + BaseParameterServer.__init__(self) + self.model = dict_to_model(model) self.port = port self.socket = None diff --git a/elephas/spark_model.py b/elephas/spark_model.py index c428eaf..cbaa412 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -149,7 +149,6 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) frequency = self.frequency - if self.mode in ['asynchronous', 'hogwild']: worker = AsynchronousSparkWorker( yaml, self.parameter_server_mode, train_config, self.frequency, From 69c4217ba9abf490bfd8940bc432c4ad6f672c51 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 11:24:46 +0200 Subject: [PATCH 30/57] fix serialization --- elephas/spark_model.py | 21 +++++++------------- elephas/worker.py | 45 ++++++++++++++++++------------------------ 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index cbaa412..d516c20 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -61,11 +61,12 @@ def __init__(self, master_network, optimizer=None, self.custom_objects = custom_objects self.parameter_server_mode = parameter_server_mode + self.serialized_model = model_to_dict(self.master_network) if self.parameter_server_mode == 'http': - self.parameter_server = HttpServer(self.master_network, self.optimizer, self.mode) + self.parameter_server = HttpServer(self.serialized_model, self.optimizer, self.mode) self.client = HttpClient() elif self.parameter_server_mode == 'socket': - self.parameter_server = SocketServer(model_to_dict(self.master_network)) + self.parameter_server = SocketServer(self.serialized_model) self.client = SocketClient() else: raise ValueError("Parameter server mode has to be either `http` or `socket`, " @@ -145,23 +146,16 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): metrics=self.master_metrics) if self.mode in ['asynchronous', 'hogwild']: self.start_server() - yaml = self.master_network.to_yaml() train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) - frequency = self.frequency if self.mode in ['asynchronous', 'hogwild']: - worker = AsynchronousSparkWorker( - yaml, self.parameter_server_mode, train_config, self.frequency, - self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects - ) + worker = AsynchronousSparkWorker(self.parameter_server_mode, train_config, self.frequency, + self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects) rdd.mapPartitions(worker.train).collect() new_parameters = self.client.get_parameters() elif self.mode == 'synchronous': - parameters = self.master_network.get_weights() - worker = SparkWorker( - yaml, parameters, train_config, - self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects - ) + worker = SparkWorker(self.serialized_model, train_config, self.master_optimizer, self.master_loss, + self.master_metrics, self.custom_objects) deltas = rdd.mapPartitions(worker.train).collect() new_parameters = self.master_network.get_weights() for delta in deltas: @@ -195,7 +189,6 @@ def __init__(self, master_network, optimizer=None, mode='asynchronous', frequenc :param custom_objects: Keras custom objects :param parameter_server_mode: String, either `http` or `socket """ - SparkModel.__init__(self, master_network=master_network, optimizer=optimizer, mode=mode, frequency=frequency, num_workers=num_workers, master_optimizer=master_optimizer, master_loss=master_loss, master_metrics=master_metrics, custom_objects=custom_objects, diff --git a/elephas/worker.py b/elephas/worker.py index 76ddb9e..2f6f375 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -1,7 +1,7 @@ import numpy as np from itertools import tee -from keras.models import model_from_yaml +from .utils.serialization import dict_to_model from .utils import subtract_params from .parameter import SocketClient, HttpClient @@ -9,15 +9,14 @@ class SparkWorker(object): """Synchronous Spark worker. This code will be executed on workers. """ - def __init__(self, yaml, parameters, train_config, master_optimizer, + def __init__(self, serialized_model, train_config, master_optimizer, master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.parameters = parameters + # TODO handle custom_objects + self.model = dict_to_model(serialized_model) self.train_config = train_config self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics - self.custom_objects = custom_objects def train(self, data_iterator): """Train a keras model on a worker @@ -26,15 +25,11 @@ def train(self, data_iterator): x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, - loss=self.master_loss, - metrics=self.master_metrics) - model.set_weights(self.parameters) - weights_before_training = model.get_weights() + self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + weights_before_training = self.model.get_weights() if x_train.shape[0] > self.train_config.get('batch_size'): - model.fit(x_train, y_train, **self.train_config) - weights_after_training = model.get_weights() + self.model.fit(x_train, y_train, **self.train_config) + weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) yield deltas @@ -42,10 +37,10 @@ def train(self, data_iterator): class AsynchronousSparkWorker(object): """Asynchronous Spark worker. This code will be executed on workers. """ - def __init__(self, yaml, parameter_server_mode, train_config, frequency, - master_optimizer, master_loss, master_metrics, - custom_objects): - self.yaml = yaml + def __init__(self, serialized_model, parameter_server_mode, train_config, frequency, + master_optimizer, master_loss, master_metrics, custom_objects): + # TODO handle custom_objects + self.model = dict_to_model(serialized_model) if parameter_server_mode == 'http': self.client = HttpClient() elif parameter_server_mode == 'socket': @@ -60,7 +55,6 @@ def __init__(self, yaml, parameter_server_mode, train_config, frequency, self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics - self.custom_objects = custom_objects def train(self, data_iterator): """Train a keras model on a worker and send asynchronous updates @@ -73,8 +67,7 @@ def train(self, data_iterator): if x_train.size == 0: return - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) nb_epoch = self.train_config['nb_epoch'] batch_size = self.train_config.get('batch_size') @@ -89,11 +82,11 @@ def train(self, data_iterator): if self.frequency == 'epoch': for epoch in range(nb_epoch): weights_before_training = self.client.get_parameters() - model.set_weights(weights_before_training) + self.model.set_weights(weights_before_training) self.train_config['nb_epoch'] = 1 if x_train.shape[0] > batch_size: - model.fit(x_train, y_train, **self.train_config) - weights_after_training = model.get_weights() + self.model.fit(x_train, y_train, **self.train_config) + weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) elif self.frequency == 'batch': @@ -102,12 +95,12 @@ def train(self, data_iterator): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: weights_before_training = self.client.get_parameters() - model.set_weights(weights_before_training) + self.model.set_weights(weights_before_training) batch_ids = index_array[batch_start:batch_end] X = slice_X(x_train, batch_ids) y = slice_X(y_train, batch_ids) - model.train_on_batch(X, y) - weights_after_training = model.get_weights() + self.model.train_on_batch(X, y) + weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) else: From 6f6007971eced8b5b88b200646de982597e8d727 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 11:27:31 +0200 Subject: [PATCH 31/57] asynch worker signature --- elephas/spark_model.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index d516c20..2fb5fdb 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -14,15 +14,9 @@ class SparkModel(object): - def __init__(self, master_network, optimizer=None, - mode='asynchronous', frequency='epoch', - num_workers=None, - master_optimizer="sgd", - master_loss="categorical_crossentropy", - master_metrics=None, - custom_objects=None, - parameter_server_mode='http', - *args, **kwargs): + def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', + num_workers=None, master_optimizer="sgd", master_loss="categorical_crossentropy", + master_metrics=None, custom_objects=None, parameter_server_mode='http', *args, **kwargs): """SparkModel Base class for distributed training on RDDs. Spark model takes a Keras @@ -149,8 +143,9 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) if self.mode in ['asynchronous', 'hogwild']: - worker = AsynchronousSparkWorker(self.parameter_server_mode, train_config, self.frequency, - self.master_optimizer, self.master_loss, self.master_metrics, self.custom_objects) + worker = AsynchronousSparkWorker(self.serialized_model, self.parameter_server_mode, train_config, + self.frequency, self.master_optimizer, self.master_loss, + self.master_metrics, self.custom_objects) rdd.mapPartitions(worker.train).collect() new_parameters = self.client.get_parameters() elif self.mode == 'synchronous': From 31f14b9cc3045a1aafecd3f16d4d6527227aa937 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 12:20:26 +0200 Subject: [PATCH 32/57] sync latest PRs, fix ML model signature --- elephas/ml_model.py | 2 +- elephas/parameter/server.py | 13 +++++++------ elephas/worker.py | 5 ++++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/elephas/ml_model.py b/elephas/ml_model.py index 33c4a00..bf2d9d6 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -59,7 +59,7 @@ def _fit(self, df): keras_model = model_from_yaml(self.get_keras_model_config()) - spark_model = SparkModel(simple_rdd.ctx, keras_model, optimizer=optimizer, + spark_model = SparkModel(keras_model, optimizer=optimizer, mode=self.get_mode(), frequency=self.get_frequency(), num_workers=self.get_num_workers()) spark_model.train(simple_rdd, nb_epoch=self.get_nb_epoch(), batch_size=self.get_batch_size(), diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index 130f6d5..bd6470d 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -44,7 +44,7 @@ class HttpServer(BaseParameterServer): """ def __init__(self, model, optimizer, mode, port=4000, debug=True, - threaded=True, use_reloader=False): + threaded=True, use_reloader=False): """Initializes and HTTP server from a serialized Keras model, elephas optimizer, a parallelisation mode and a port to run the Flask application on. In hogwild mode no read- or write-locks will be acquired, in asynchronous @@ -115,11 +115,12 @@ def handle_update_parameters(): delta = pickle.loads(request.data) if self.mode == 'asynchronous': self.lock.acquire_write() - constraints = self.master_network.constraints - if len(constraints) == 0: - def empty(a): - return a - constraints = [empty for x in self.weights] + + if not self.master_network.built: + self.master_network.build() + + base_constraint = lambda a: a + constraints = [base_constraint for _ in self.weights] self.weights = self.optimizer.get_updates(self.weights, constraints, delta) if self.mode == 'asynchronous': self.lock.release() diff --git a/elephas/worker.py b/elephas/worker.py index 2f6f375..4b99baf 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -83,9 +83,12 @@ def train(self, data_iterator): for epoch in range(nb_epoch): weights_before_training = self.client.get_parameters() self.model.set_weights(weights_before_training) - self.train_config['nb_epoch'] = 1 + self.train_config['epochs'] = 1 + self.train_config['nb_epoch'] = 1 # legacy support if x_train.shape[0] > batch_size: self.model.fit(x_train, y_train, **self.train_config) + self.train_config['epochs'] = nb_epoch + self.train_config['nb_epoch'] = nb_epoch weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) From b570aa54229c9ef3fca68c3a6c0e0b02ebd562d6 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 12:27:02 +0200 Subject: [PATCH 33/57] replace slice_X --- elephas/worker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elephas/worker.py b/elephas/worker.py index 4b99baf..748e6da 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -1,5 +1,6 @@ import numpy as np from itertools import tee +from keras.utils.generic_utils import slice_arrays from .utils.serialization import dict_to_model from .utils import subtract_params @@ -93,16 +94,15 @@ def train(self, data_iterator): deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) elif self.frequency == 'batch': - from keras.engine.training import slice_X for epoch in range(nb_epoch): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: weights_before_training = self.client.get_parameters() self.model.set_weights(weights_before_training) batch_ids = index_array[batch_start:batch_end] - X = slice_X(x_train, batch_ids) - y = slice_X(y_train, batch_ids) - self.model.train_on_batch(X, y) + x = slice_arrays(x_train, batch_ids) + y = slice_arrays(y_train, batch_ids) + self.model.train_on_batch(x, y) weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) From 328b026cf5525b716bb41f26922183b35d3075b8 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 13:51:48 +0200 Subject: [PATCH 34/57] test ml params --- tests/ml/test_params.py | 87 ++++++++++++++++++++++++++++++++++ tests/mllib/test_adapter.py | 1 + tests/parameter/test_client.py | 1 + tests/parameter/test_server.py | 1 + tests/test_ml_model.py | 1 + tests/test_optimizers.py | 1 + tests/test_parameter.py | 0 tests/test_spark_model.py | 1 + tests/test_worker.py | 1 + tests/utils/test_rwlock.py | 1 + tests/utils/test_sockets.py | 1 + 11 files changed, 96 insertions(+) delete mode 100644 tests/test_parameter.py diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py index e69de29..e9baecb 100644 --- a/tests/ml/test_params.py +++ b/tests/ml/test_params.py @@ -0,0 +1,87 @@ +from elephas.ml.params import * + + +def test_has_keras_model_config(): + param = HasKerasModelConfig() + conf = {"foo": "bar"} + param.set_keras_model_config(conf) + assert conf == param.get_keras_model_config() + + +def test_has_optimizer_config(): + param = HasOptimizerConfig() + conf = {"foo": "bar"} + param.set_optimizer_config(conf) + assert conf == param.get_optimizer_config() + + +def test_has_mode(): + param = HasMode() + assert param.get_mode() == "asynchronous" + mode = "foobar" + param.set_mode(mode) + assert param.get_mode() == mode + + +def test_has_frequency(): + param = HasFrequency() + assert param.get_frequency() == "epoch" + freq = "foobar" + param.set_frequency(freq) + assert param.get_frequency() == freq + + +def test_has_number_of_classes(): + param = HasNumberOfClasses() + assert param.get_nb_classes() == 10 + classes = 42 + param.set_nb_classes(classes) + assert param.get_nb_classes() == classes + + +def test_has_categorical_labels(): + param = HasCategoricalLabels() + assert param.get_categorical_labels() + has_labels = False + param.set_categorical_labels(has_labels) + assert param.get_categorical_labels() == has_labels + + +def test_has_epochs(): + param = HasEpochs() + assert param.get_nb_epoch() == 10 + epochs = 42 + param.set_nb_epoch(epochs) + assert param.get_nb_epoch() == epochs + + +def test_has_batch_size(): + param = HasBatchSize() + assert param.get_batch_size() == 32 + bs = 42 + param.set_batch_size(bs) + assert param.get_batch_size() == bs + + +def test_has_verbosity(): + param = HasVerbosity() + assert param.get_verbosity() == 0 + verbosity = 2 + param.set_verbosity(verbosity) + assert param.get_verbosity() == verbosity + + +def test_has_validation_split(): + param = HasValidationSplit() + assert param.get_validation_split() == 0.1 + split = 0.5 + param.set_validation_split(split) + assert param.get_validation_split() == split + + +def test_has_number_of_workers(): + param = HasNumberOfWorkers() + assert param.get_num_workers() == 8 + workers = 12 + param.set_num_workers(workers) + assert param.get_num_workers() == workers \ No newline at end of file diff --git a/tests/mllib/test_adapter.py b/tests/mllib/test_adapter.py index e69de29..b250f14 100644 --- a/tests/mllib/test_adapter.py +++ b/tests/mllib/test_adapter.py @@ -0,0 +1 @@ +# TODO test mllib adapter \ No newline at end of file diff --git a/tests/parameter/test_client.py b/tests/parameter/test_client.py index e69de29..0cbdcac 100644 --- a/tests/parameter/test_client.py +++ b/tests/parameter/test_client.py @@ -0,0 +1 @@ +# TODO test clients \ No newline at end of file diff --git a/tests/parameter/test_server.py b/tests/parameter/test_server.py index e69de29..4c0e0a7 100644 --- a/tests/parameter/test_server.py +++ b/tests/parameter/test_server.py @@ -0,0 +1 @@ +# TODO test servers \ No newline at end of file diff --git a/tests/test_ml_model.py b/tests/test_ml_model.py index e69de29..1d7d70a 100644 --- a/tests/test_ml_model.py +++ b/tests/test_ml_model.py @@ -0,0 +1 @@ +# TODO test basic ml model \ No newline at end of file diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py index e69de29..8e8be9f 100644 --- a/tests/test_optimizers.py +++ b/tests/test_optimizers.py @@ -0,0 +1 @@ +# TODO: test optimizers \ No newline at end of file diff --git a/tests/test_parameter.py b/tests/test_parameter.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py index e69de29..2a3d6ea 100644 --- a/tests/test_spark_model.py +++ b/tests/test_spark_model.py @@ -0,0 +1 @@ +# TODO test basic spark model \ No newline at end of file diff --git a/tests/test_worker.py b/tests/test_worker.py index e69de29..6ab8963 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -0,0 +1 @@ +# TODO test workers \ No newline at end of file diff --git a/tests/utils/test_rwlock.py b/tests/utils/test_rwlock.py index e69de29..2bc8f73 100644 --- a/tests/utils/test_rwlock.py +++ b/tests/utils/test_rwlock.py @@ -0,0 +1 @@ +# TODO test lock \ No newline at end of file diff --git a/tests/utils/test_sockets.py b/tests/utils/test_sockets.py index e69de29..253355a 100644 --- a/tests/utils/test_sockets.py +++ b/tests/utils/test_sockets.py @@ -0,0 +1 @@ +# TODO test sockets \ No newline at end of file From 10854142288986ffe51d48fad4713f69a8f5af8b Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 14:00:33 +0200 Subject: [PATCH 35/57] test mllib adapter --- tests/mllib/test_adapter.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/mllib/test_adapter.py b/tests/mllib/test_adapter.py index b250f14..3956aac 100644 --- a/tests/mllib/test_adapter.py +++ b/tests/mllib/test_adapter.py @@ -1 +1,28 @@ -# TODO test mllib adapter \ No newline at end of file +import numpy as np +from elephas.mllib.adapter import * +from pyspark.mllib.linalg import Matrices, Vectors + + +def test_to_matrix(): + x = np.ones((4, 2)) + mat = to_matrix(x) + assert mat.numRows == 4 + assert mat.numCols == 2 + + +def test_from_matrix(): + mat = Matrices.dense(1, 2, [13, 37]) + x = from_matrix(mat) + assert x.shape == (1, 2) + + +def test_from_vector(): + x = np.ones((3,)) + vector = to_vector(x) + assert len(vector) == 3 + + +def test_to_vector(): + vector = Vectors.dense([4, 2]) + x = from_vector(vector) + assert x.shape == (2,) \ No newline at end of file From 2a421aa48d12c7cd483194ea2958ff57685bf0c1 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 14:35:34 +0200 Subject: [PATCH 36/57] test --- elephas/parameter/client.py | 2 - elephas/parameter/server.py | 2 - elephas/spark_model.py | 17 ++++++--- elephas/utils/sockets.py | 2 +- elephas/worker.py | 11 +++++- examples/mnist_mlp_spark.py | 3 +- tests/test_spark_model.py | 73 ++++++++++++++++++++++++++++++++++++- 7 files changed, 95 insertions(+), 15 deletions(-) diff --git a/elephas/parameter/client.py b/elephas/parameter/client.py index accfa13..997b082 100644 --- a/elephas/parameter/client.py +++ b/elephas/parameter/client.py @@ -45,7 +45,6 @@ class HttpClient(BaseParameterClient): get parameters and `/update` to update the server's parameters. """ def __init__(self, port=4000): - BaseParameterClient.__init__(self) self.master_url = determine_master(port=port) self.headers = {'Content-Type': 'application/elephas'} @@ -70,7 +69,6 @@ class SocketClient(BaseParameterClient): indicate a get-request, those with a `u` indicate a parameter update. """ def __init__(self, host='0.0.0.0', port=4000): - BaseParameterClient.__init__(self) self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.connect((host, port)) diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index bd6470d..818eb1b 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -58,7 +58,6 @@ def __init__(self, model, optimizer, mode, port=4000, debug=True, :param threaded: boolean, Flask threaded application mode :param use_reloader: boolean, Flask `use_reloader` argument """ - BaseParameterServer.__init__(self) self.master_network = dict_to_model(model) self.mode = mode @@ -144,7 +143,6 @@ def __init__(self, model, port=4000): :param model: Serialized Keras model :param port: int, port to run the socket on """ - BaseParameterServer.__init__(self) self.model = dict_to_model(model) self.port = port diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 2fb5fdb..deaf4fd 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -141,16 +141,23 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): if self.mode in ['asynchronous', 'hogwild']: self.start_server() train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) + model = self.serialized_model + mode = self.parameter_server_mode + freq = self.frequency + optimizer = self.master_optimizer + loss = self.master_loss + metrics = self.master_metrics + custom = self.custom_objects if self.mode in ['asynchronous', 'hogwild']: - worker = AsynchronousSparkWorker(self.serialized_model, self.parameter_server_mode, train_config, - self.frequency, self.master_optimizer, self.master_loss, - self.master_metrics, self.custom_objects) + worker = AsynchronousSparkWorker(model, mode, train_config, freq, optimizer, loss,metrics, custom) rdd.mapPartitions(worker.train).collect() new_parameters = self.client.get_parameters() elif self.mode == 'synchronous': - worker = SparkWorker(self.serialized_model, train_config, self.master_optimizer, self.master_loss, - self.master_metrics, self.custom_objects) + yaml = self.master_network.to_yaml() + init = self.master_network.get_weights() + parameters = rdd.context.broadcast(init) + worker = SparkWorker(yaml, parameters, train_config, optimizer, loss, metrics, custom) deltas = rdd.mapPartitions(worker.train).collect() new_parameters = self.master_network.get_weights() for delta in deltas: diff --git a/elephas/utils/sockets.py b/elephas/utils/sockets.py index 5f2c544..abee72f 100644 --- a/elephas/utils/sockets.py +++ b/elephas/utils/sockets.py @@ -9,7 +9,7 @@ def determine_master(port=':4000'): :param port: port on which the application runs :return: Master address """ - return gethostbyname(gethostname()) + port + return str(gethostbyname(gethostname())) + str(port) def _receive_all(socket, num_bytes): diff --git a/elephas/worker.py b/elephas/worker.py index 748e6da..0a17782 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -1,6 +1,7 @@ import numpy as np from itertools import tee from keras.utils.generic_utils import slice_arrays +from keras.models import model_from_yaml from .utils.serialization import dict_to_model from .utils import subtract_params @@ -10,18 +11,24 @@ class SparkWorker(object): """Synchronous Spark worker. This code will be executed on workers. """ - def __init__(self, serialized_model, train_config, master_optimizer, + def __init__(self, yaml, parameters, train_config, master_optimizer, master_loss, master_metrics, custom_objects): # TODO handle custom_objects - self.model = dict_to_model(serialized_model) + self.yaml = yaml + self.parameters = parameters self.train_config = train_config self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics + self.custom_objects = custom_objects def train(self, data_iterator): """Train a keras model on a worker """ + model = model_from_yaml(self.yaml, self.custom_objects) + model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + model.set_weights(self.parameters.value) + feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py index 3f06c7d..0e86a4d 100644 --- a/examples/mnist_mlp_spark.py +++ b/examples/mnist_mlp_spark.py @@ -55,8 +55,7 @@ # Initialize SparkModel from Keras model and Spark context adagrad = elephas_optimizers.Adagrad() -spark_model = SparkModel(sc, - model, +spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='asynchronous', diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py index 2a3d6ea..ecc52dc 100644 --- a/tests/test_spark_model.py +++ b/tests/test_spark_model.py @@ -1 +1,72 @@ -# TODO test basic spark model \ No newline at end of file +from __future__ import absolute_import +from __future__ import print_function + +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Activation +from keras.optimizers import SGD +from keras.utils import np_utils + +from elephas.spark_model import SparkModel +from elephas.utils.rdd_utils import to_simple_rdd +from elephas import optimizers as elephas_optimizers + +from pyspark import SparkContext, SparkConf + + +def test_spark_model(): + # Define basic parameters + batch_size = 64 + nb_classes = 10 + epochs = 1 + + # Create Spark context + conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') + sc = SparkContext(conf=conf) + + # Load data + (x_train, y_train), (x_test, y_test) = mnist.load_data() + + x_train = x_train.reshape(60000, 784) + x_test = x_test.reshape(10000, 784) + x_train = x_train.astype("float32") + x_test = x_test.astype("float32") + x_train /= 255 + x_test /= 255 + print(x_train.shape[0], 'train samples') + print(x_test.shape[0], 'test samples') + + # Convert class vectors to binary class matrices + y_train = np_utils.to_categorical(y_train, nb_classes) + y_test = np_utils.to_categorical(y_test, nb_classes) + + model = Sequential() + model.add(Dense(128, input_dim=784)) + model.add(Activation('relu')) + model.add(Dropout(0.2)) + model.add(Dense(128)) + model.add(Activation('relu')) + model.add(Dropout(0.2)) + model.add(Dense(10)) + model.add(Activation('softmax')) + + sgd = SGD(lr=0.1) + + # Build RDD from numpy features and labels + rdd = to_simple_rdd(sc, x_train, y_train) + + # Initialize SparkModel from Keras model and Spark context + adagrad = elephas_optimizers.Adagrad() + spark_model = SparkModel(model, + optimizer=adagrad, + frequency='epoch', + mode='synchronous', + num_workers=2, + master_optimizer=sgd) + + # Train Spark model + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) + + # Evaluate Spark model by evaluating the underlying model + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) From d13e83524c825058159843520c39c9943d9ae5e1 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 15:12:46 +0200 Subject: [PATCH 37/57] spark ml model runs again --- elephas/parameter/server.py | 1 + elephas/spark_model.py | 14 ++++++++------ elephas/worker.py | 23 ++++++++++++++--------- examples/mnist_mlp_spark.py | 4 ++-- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/elephas/parameter/server.py b/elephas/parameter/server.py index 818eb1b..b25b9ff 100644 --- a/elephas/parameter/server.py +++ b/elephas/parameter/server.py @@ -8,6 +8,7 @@ from ..utils.sockets import determine_master from ..utils.sockets import receive, send from ..utils.serialization import dict_to_model +# from multiprocessing import Lock from ..utils.rwlock import RWLock as Lock diff --git a/elephas/spark_model.py b/elephas/spark_model.py index deaf4fd..96d85c9 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -141,7 +141,6 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): if self.mode in ['asynchronous', 'hogwild']: self.start_server() train_config = self.get_train_config(epochs, batch_size, verbose, validation_split) - model = self.serialized_model mode = self.parameter_server_mode freq = self.frequency optimizer = self.master_optimizer @@ -149,19 +148,22 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): metrics = self.master_metrics custom = self.custom_objects + yaml = self.master_network.to_yaml() + init = self.master_network.get_weights() + parameters = rdd.context.broadcast(init) + if self.mode in ['asynchronous', 'hogwild']: - worker = AsynchronousSparkWorker(model, mode, train_config, freq, optimizer, loss,metrics, custom) + worker = AsynchronousSparkWorker(yaml, parameters, mode, train_config, freq, optimizer, loss, metrics, custom) rdd.mapPartitions(worker.train).collect() new_parameters = self.client.get_parameters() elif self.mode == 'synchronous': - yaml = self.master_network.to_yaml() - init = self.master_network.get_weights() - parameters = rdd.context.broadcast(init) + worker = SparkWorker(yaml, parameters, train_config, optimizer, loss, metrics, custom) deltas = rdd.mapPartitions(worker.train).collect() new_parameters = self.master_network.get_weights() for delta in deltas: - constraints = self.master_network.constraints + base_constraint = lambda a: a + constraints = [base_constraint for _ in self.weights] new_parameters = self.optimizer.get_updates(self.weights, constraints, delta) else: raise ValueError("Unsupported mode {}".format(self.mode)) diff --git a/elephas/worker.py b/elephas/worker.py index 0a17782..ef4b7cf 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -3,7 +3,6 @@ from keras.utils.generic_utils import slice_arrays from keras.models import model_from_yaml -from .utils.serialization import dict_to_model from .utils import subtract_params from .parameter import SocketClient, HttpClient @@ -13,21 +12,21 @@ class SparkWorker(object): """ def __init__(self, yaml, parameters, train_config, master_optimizer, master_loss, master_metrics, custom_objects): - # TODO handle custom_objects self.yaml = yaml self.parameters = parameters self.train_config = train_config - self.master_optimizer = master_optimizer + self.master_optimizer = "sgd" # TODO self.master_loss = master_loss self.master_metrics = master_metrics self.custom_objects = custom_objects + self.model = None def train(self, data_iterator): """Train a keras model on a worker """ - model = model_from_yaml(self.yaml, self.custom_objects) - model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) - model.set_weights(self.parameters.value) + self.model = model_from_yaml(self.yaml, self.custom_objects) + self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.model.set_weights(self.parameters.value) feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) @@ -45,10 +44,9 @@ def train(self, data_iterator): class AsynchronousSparkWorker(object): """Asynchronous Spark worker. This code will be executed on workers. """ - def __init__(self, serialized_model, parameter_server_mode, train_config, frequency, + def __init__(self, yaml, parameters, parameter_server_mode, train_config, frequency, master_optimizer, master_loss, master_metrics, custom_objects): - # TODO handle custom_objects - self.model = dict_to_model(serialized_model) + if parameter_server_mode == 'http': self.client = HttpClient() elif parameter_server_mode == 'socket': @@ -63,6 +61,11 @@ def __init__(self, serialized_model, parameter_server_mode, train_config, freque self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics + self.yaml = yaml + self.parameters = parameters + self.custom_objects = custom_objects + self.model = None + def train(self, data_iterator): """Train a keras model on a worker and send asynchronous updates @@ -75,7 +78,9 @@ def train(self, data_iterator): if x_train.size == 0: return + self.model = model_from_yaml(self.yaml, self.custom_objects) self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.model.set_weights(self.parameters.value) nb_epoch = self.train_config['nb_epoch'] batch_size = self.train_config.get('batch_size') diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py index 0e86a4d..fad0cde 100644 --- a/examples/mnist_mlp_spark.py +++ b/examples/mnist_mlp_spark.py @@ -16,7 +16,7 @@ # Define basic parameters batch_size = 64 nb_classes = 10 -nb_epoch = 10 +epochs = 10 # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') @@ -62,7 +62,7 @@ num_workers=2,master_optimizer=sgd) # Train Spark model -spark_model.train(rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=2, validation_split=0.1) +spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) From 6b9eca40fca065cd84f03fcb111af9b2276b8556 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 15:36:31 +0200 Subject: [PATCH 38/57] fix async test --- elephas/ml_model.py | 4 +- elephas/utils/sockets.py | 4 +- elephas/worker.py | 4 +- tests/test_spark_model.py | 142 +++++++++++++++++++++++++------------- 4 files changed, 98 insertions(+), 56 deletions(-) diff --git a/elephas/ml_model.py b/elephas/ml_model.py index bf2d9d6..afedcc4 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -62,8 +62,8 @@ def _fit(self, df): spark_model = SparkModel(keras_model, optimizer=optimizer, mode=self.get_mode(), frequency=self.get_frequency(), num_workers=self.get_num_workers()) - spark_model.train(simple_rdd, nb_epoch=self.get_nb_epoch(), batch_size=self.get_batch_size(), - verbose=self.get_verbosity(), validation_split=self.get_validation_split()) + spark_model.fit(simple_rdd, epochs=self.get_nb_epoch(), batch_size=self.get_batch_size(), + verbose=self.get_verbosity(), validation_split=self.get_validation_split()) model_weights = spark_model.master_network.get_weights() weights = simple_rdd.ctx.broadcast(model_weights) diff --git a/elephas/utils/sockets.py b/elephas/utils/sockets.py index abee72f..dd15ebd 100644 --- a/elephas/utils/sockets.py +++ b/elephas/utils/sockets.py @@ -2,14 +2,14 @@ from socket import gethostbyname, gethostname -def determine_master(port=':4000'): +def determine_master(port=4000): """Determine address of master so that workers can connect to it. :param port: port on which the application runs :return: Master address """ - return str(gethostbyname(gethostname())) + str(port) + return gethostbyname(gethostname()) + ":" + str(port) def _receive_all(socket, num_bytes): diff --git a/elephas/worker.py b/elephas/worker.py index ef4b7cf..cf28b41 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -55,10 +55,9 @@ def __init__(self, yaml, parameters, parameter_server_mode, train_config, freque raise ValueError("Parameter server mode has to be either `http` or `socket`, " "got {}".format(parameter_server_mode)) - self.client = parameter_server_mode self.train_config = train_config self.frequency = frequency - self.master_optimizer = master_optimizer + self.master_optimizer = "sgd" # TODO master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics self.yaml = yaml @@ -66,7 +65,6 @@ def __init__(self, yaml, parameters, parameter_server_mode, train_config, freque self.custom_objects = custom_objects self.model = None - def train(self, data_iterator): """Train a keras model on a worker and send asynchronous updates to parameter server diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py index ecc52dc..61313a3 100644 --- a/tests/test_spark_model.py +++ b/tests/test_spark_model.py @@ -13,60 +13,104 @@ from pyspark import SparkContext, SparkConf +# Define basic parameters +batch_size = 64 +nb_classes = 10 +epochs = 1 -def test_spark_model(): - # Define basic parameters - batch_size = 64 - nb_classes = 10 - epochs = 1 - - # Create Spark context - conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') - sc = SparkContext(conf=conf) - - # Load data - (x_train, y_train), (x_test, y_test) = mnist.load_data() - - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype("float32") - x_test = x_test.astype("float32") - x_train /= 255 - x_test /= 255 - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - - # Convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - - model = Sequential() - model.add(Dense(128, input_dim=784)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(128)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - sgd = SGD(lr=0.1) - - # Build RDD from numpy features and labels +# Create Spark context +conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') +sc = SparkContext(conf=conf) + +# Load data +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +x_train = x_train.reshape(60000, 784) +x_test = x_test.reshape(10000, 784) +x_train = x_train.astype("float32") +x_test = x_test.astype("float32") +x_train /= 255 +x_test /= 255 +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') + +# Convert class vectors to binary class matrices +y_train = np_utils.to_categorical(y_train, nb_classes) +y_test = np_utils.to_categorical(y_test, nb_classes) + +model = Sequential() +model.add(Dense(128, input_dim=784)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(128)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(10)) +model.add(Activation('softmax')) + +sgd = SGD(lr=0.1) + + +def test_spark_model_synchronous_epoch(): + rdd = to_simple_rdd(sc, x_train, y_train) + + adagrad = elephas_optimizers.Adagrad() + spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', + mode='synchronous', num_workers=2, master_optimizer=sgd) + + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) + + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) + + +def test_spark_model_synchronous_batch(): rdd = to_simple_rdd(sc, x_train, y_train) - # Initialize SparkModel from Keras model and Spark context adagrad = elephas_optimizers.Adagrad() - spark_model = SparkModel(model, - optimizer=adagrad, - frequency='epoch', - mode='synchronous', - num_workers=2, - master_optimizer=sgd) - - # Train Spark model + spark_model = SparkModel(model, optimizer=adagrad, frequency='batch', + mode='synchronous', num_workers=2, master_optimizer=sgd) + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) + + +def test_spark_model_asynchronous_epoch(): + rdd = to_simple_rdd(sc, x_train, y_train) + + adagrad = elephas_optimizers.Adagrad() + spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', + mode='asynchronous', num_workers=2, master_optimizer=sgd) + + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) + + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) + + +def test_spark_model_asynchronous_batch(): + rdd = to_simple_rdd(sc, x_train, y_train) + + adagrad = elephas_optimizers.Adagrad() + spark_model = SparkModel(model, optimizer=adagrad, frequency='batch', + mode='asynchronous', num_workers=2, master_optimizer=sgd) + + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) + + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) + + +def test_spark_model_hogwild_epoch(): + rdd = to_simple_rdd(sc, x_train, y_train) + + adagrad = elephas_optimizers.Adagrad() + spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', + mode='hogwild', num_workers=2, master_optimizer=sgd) + + spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) + + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) \ No newline at end of file From 22ae3aeac558f80c91ed0141faf57d9f635c7d04 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 15:56:10 +0200 Subject: [PATCH 39/57] clean --- elephas/spark_model.py | 27 ++++++++------------- tests/test_spark_model.py | 51 ++++++++------------------------------- 2 files changed, 20 insertions(+), 58 deletions(-) diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 96d85c9..dcfd897 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -67,25 +67,18 @@ def __init__(self, master_network, optimizer=None, mode='asynchronous', frequenc "got {}".format(self.parameter_server_mode)) @staticmethod - def get_train_config(nb_epoch, batch_size, - verbose, validation_split): - """Get configuration of training parameters - """ - train_config = {'nb_epoch': nb_epoch, - 'batch_size': batch_size, - 'verbose': verbose, - 'validation_split': validation_split} - return train_config + def get_train_config(nb_epoch, batch_size, verbose, validation_split): + return {'nb_epoch': nb_epoch, + 'batch_size': batch_size, + 'verbose': verbose, + 'validation_split': validation_split} def get_config(self): - """Get configuration of model parameters - """ - model_config = {'model': self.master_network.get_config(), - 'optimizer': self.optimizer.get_config(), - 'mode': self.mode, - 'frequency': self.frequency, - 'num_workers': self.num_workers} - return model_config + return {'model': self.master_network.get_config(), + 'optimizer': self.optimizer.get_config(), + 'mode': self.mode, + 'frequency': self.frequency, + 'num_workers': self.num_workers} @property def master_network(self): diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py index 61313a3..0090f34 100644 --- a/tests/test_spark_model.py +++ b/tests/test_spark_model.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from __future__ import print_function +import pytest from keras.datasets import mnist from keras.models import Sequential @@ -11,7 +12,6 @@ from elephas.utils.rdd_utils import to_simple_rdd from elephas import optimizers as elephas_optimizers -from pyspark import SparkContext, SparkConf # Define basic parameters batch_size = 64 @@ -19,8 +19,8 @@ epochs = 1 # Create Spark context -conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') -sc = SparkContext(conf=conf) +pytest.mark.usefixtures("spark_context") + # Load data (x_train, y_train), (x_test, y_test) = mnist.load_data() @@ -51,66 +51,35 @@ sgd = SGD(lr=0.1) -def test_spark_model_synchronous_epoch(): - rdd = to_simple_rdd(sc, x_train, y_train) +def test_spark_model_end_to_end(spark_context): + rdd = to_simple_rdd(spark_context, x_train, y_train) adagrad = elephas_optimizers.Adagrad() + + # sync epoch spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='synchronous', num_workers=2, master_optimizer=sgd) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) - -def test_spark_model_synchronous_batch(): - rdd = to_simple_rdd(sc, x_train, y_train) - - adagrad = elephas_optimizers.Adagrad() + # sync batch spark_model = SparkModel(model, optimizer=adagrad, frequency='batch', mode='synchronous', num_workers=2, master_optimizer=sgd) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) - -def test_spark_model_asynchronous_epoch(): - rdd = to_simple_rdd(sc, x_train, y_train) - - adagrad = elephas_optimizers.Adagrad() + # async epoch spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2, master_optimizer=sgd) - - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) - - -def test_spark_model_asynchronous_batch(): - rdd = to_simple_rdd(sc, x_train, y_train) - - adagrad = elephas_optimizers.Adagrad() - spark_model = SparkModel(model, optimizer=adagrad, frequency='batch', - mode='asynchronous', num_workers=2, master_optimizer=sgd) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) - -def test_spark_model_hogwild_epoch(): - rdd = to_simple_rdd(sc, x_train, y_train) - - adagrad = elephas_optimizers.Adagrad() + # hogwild epoch spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='hogwild', num_workers=2, master_optimizer=sgd) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) \ No newline at end of file From 09f1f481056bad8d47ecf4fdda4a7f737325c77f Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:16:20 +0200 Subject: [PATCH 40/57] requirements --- requirements.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2f5a128 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +Flask==1.0.2 +hyperas==0.4 +Keras==2.1.3 +Keras-Applications==1.0.4 +Keras-Preprocessing==1.0.2 +numpy==1.14.5 +pyspark==2.3.1 +six==1.11.0 +tensorflow==1.10.0 +networkx==1.1 \ No newline at end of file From 30f5da126116576fa25fab7f538694c25fd3c667 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:16:51 +0200 Subject: [PATCH 41/57] simpler docker file --- Dockerfile | 109 ++++++++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f822ff..61c3358 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,55 +1,54 @@ -# Extension of the Jupyter Notebooks -# Distributed under the terms of the Modified BSD / MIT License. -FROM jupyter/scipy-notebook - -MAINTAINER Elephas Project - -USER root - -# Spark dependencies -ENV APACHE_SPARK_VERSION 2.0.1 -ENV PYJ_VERSION py4j-0.10.1-src.zip -RUN apt-get -y update && \ - apt-get install -y --no-install-recommends openjdk-7-jre-headless && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* -RUN cd /tmp && \ - wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \ - tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \ - rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark - -# Mesos dependencies -# Currently, Mesos is not available from Debian Jessie. -# So, we are installing it from Debian Wheezy. Once it -# becomes available for Debian Jessie. We should switch -# over to using that instead. -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \ - DISTRO=debian && \ - CODENAME=wheezy && \ - echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" > /etc/apt/sources.list.d/mesosphere.list && \ - apt-get -y update && \ - apt-get --no-install-recommends -y --force-yes install mesos=0.22.1-1.0.debian78 && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# additional libraries for Keras and Elephas -# RUN apt-get --no-install-recommends -y --force-yes install liblapack-dev libblas-dev gfortran - -# Spark and Mesos config -ENV SPARK_HOME /usr/local/spark -ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/$PYJ_LIB_VERSION -ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so -ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info - -USER $NB_USER - -# Install Python 3 Tensorflow -RUN conda install --quiet --yes 'tensorflow=0.9.0' -# Keras -RUN conda install --channel https://conda.anaconda.org/KEHANG --quiet --yes 'keras=1.0.8' -# Use the latest version of hyperopts (python 3.5 compatibility) -RUN pip install https://github.com/hyperopt/hyperopt/archive/master.zip -# Elephas for distributed spark -RUN pip install elephas -RUN pip install py4j +FROM gw000/keras:2.1.3-py3-tf-gpu +MAINTAINER gw0 [http://gw.tnode.com/] + +# install py3-tf-cpu/gpu (Python 3, TensorFlow, CPU/GPU) +RUN apt-get update -qq \ + && apt-get install --no-install-recommends -y \ + # install python 3 + python3 \ + python3-dev \ + python3-pip \ + python3-setuptools \ + python3-virtualenv \ + pkg-config \ + # requirements for numpy + libopenblas-base \ + python3-numpy \ + python3-scipy \ + # requirements for keras + python3-h5py \ + python3-yaml \ + python3-pydot \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG TENSORFLOW_VERSION=1.10.0 +ARG TENSORFLOW_DEVICE=gpu +ARG TENSORFLOW_APPEND=_gpu +RUN pip3 --no-cache-dir install https://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_DEVICE}/tensorflow${TENSORFLOW_APPEND}-${TENSORFLOW_VERSION}-cp35-cp35m-linux_x86_64.whl + +ARG KERAS_VERSION=2.1.3 +ENV KERAS_BACKEND=tensorflow +RUN pip3 --no-cache-dir install git+https://github.com/fchollet/keras.git@${KERAS_VERSION} + +# install additional debian packages +RUN apt-get update -qq \ + && apt-get install --no-install-recommends -y \ + # system tools + less \ + procps \ + vim-tiny \ + # build dependencies + build-essential \ + libffi-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + + +RUN mkdir -p app +WORKDIR /app +COPY ./requirements.txt /app + +# Install requirements +RUN pip3 install -r ./requirements.txt +RUN pip3 install git+https://github.com/hyperopt/hyperopt.git \ No newline at end of file From c90cf88b8731b47c7a5cb23ad442aa15f49b3968 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:17:07 +0200 Subject: [PATCH 42/57] just install elephas --- README.md | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/README.md b/README.md index b8c58d7..cb760cd 100644 --- a/README.md +++ b/README.md @@ -42,36 +42,11 @@ Elephas implements a class of data-parallel algorithms on top of Keras, using Sp ## Getting started ### Installation -Install elephas from PyPI with +Install elephas from PyPI with, Spark will be installed through `pyspark` for you. ``` pip install elephas ``` -Depending on what OS you are using, you may need to install some prerequisite modules (LAPACK, BLAS, fortran compiler) first. -For example, on Ubuntu Linux: -``` -sudo apt-get install liblapack-dev libblas-dev gfortran -``` - -A quick way to install Spark locally is to use homebrew on Mac -``` -brew install spark -``` -or linuxbrew on linux. -``` -brew install apache-spark -``` -The brew version of Spark may be outdated at times. To build from source, simply follow the instructions at the [Spark download section](http://spark.apache.org/downloads.html) or use the following commands. -``` -wget http://apache.mirrors.tds.net/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.6.tgz -P ~ -sudo tar zxvf ~/spark-* -C /usr/local -sudo mv /usr/local/spark-* /usr/local/spark -``` -After that, make sure to put these path variables to your shell profile (e.g. `~/.zshrc`): -``` -export SPARK_HOME=/usr/local/spark -export PATH=$PATH:$SPARK_HOME/bin -``` ### Using Docker From bdeaab71e80fc03ecb7e7fc4f2ae23e957ddd73a Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:17:16 +0200 Subject: [PATCH 43/57] hyperparam test --- elephas/hyperparam.py | 4 +- examples/hyperparam_optimization.py | 8 +-- tests/test_hyperparam.py | 92 ++++++++++++++--------------- 3 files changed, 49 insertions(+), 55 deletions(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index 308f3b0..fdeb455 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -23,9 +23,7 @@ def __init__(self, sc, num_workers=4): def compute_trials(self, model, data, max_evals): model_string = get_hyperopt_model_string(model=model, data=data, functions=None, notebook_name=None, verbose=False, stack=3) - # bc_model = self.spark_context.broadcast(model_string) - # bc_max_evals = self.spark_context.broadcast(max_evals) - + print(model_string) hyperas_worker = HyperasWorker(model_string, max_evals) dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) dummy_rdd = dummy_rdd.repartition(self.num_workers) diff --git a/examples/hyperparam_optimization.py b/examples/hyperparam_optimization.py index bc0a245..d8a981f 100644 --- a/examples/hyperparam_optimization.py +++ b/examples/hyperparam_optimization.py @@ -2,6 +2,7 @@ from hyperopt import STATUS_OK from hyperas.distributions import choice, uniform +import six.moves.cPickle as pickle from elephas.hyperparam import HyperParamModel @@ -53,15 +54,14 @@ def model(X_train, Y_train, X_test, Y_test): model.add(Activation('softmax')) rms = RMSprop() - model.compile(loss='categorical_crossentropy', optimizer=rms) + model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['acc']) model.fit(X_train, Y_train, batch_size={{choice([64, 128])}}, - nb_epoch=1, - show_accuracy=True, + epochs=1, verbose=2, validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) + score, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test accuracy:', acc) return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pickle.dumps(model.get_weights())} diff --git a/tests/test_hyperparam.py b/tests/test_hyperparam.py index 0387826..1a740ba 100644 --- a/tests/test_hyperparam.py +++ b/tests/test_hyperparam.py @@ -1,4 +1,3 @@ -import numpy as np import pytest from hyperopt import STATUS_OK from hyperas.distributions import choice, uniform @@ -9,54 +8,51 @@ pytest.mark.usefixtures("spark_context") -def test_that_requires_sc(spark_context): - assert spark_context.parallelize(np.zeros((10, 10))).count() == 10 +def data(): + from keras.datasets import mnist + from keras.utils import np_utils + (X_train, y_train), (X_test, y_test) = mnist.load_data() + X_train = X_train.reshape(60000, 784) + X_test = X_test.reshape(10000, 784) + X_train = X_train.astype('float32') + X_test = X_test.astype('float32') + X_train /= 255 + X_test /= 255 + nb_classes = 10 + Y_train = np_utils.to_categorical(y_train, nb_classes) + Y_test = np_utils.to_categorical(y_test, nb_classes) + return X_train, Y_train, X_test, Y_test + + +def model(X_train, Y_train, X_test, Y_test): + from keras.models import Sequential + from keras.layers.core import Dense, Dropout, Activation + from keras.optimizers import RMSprop + + model = Sequential() + model.add(Dense(512, input_shape=(784,))) + model.add(Activation('relu')) + model.add(Dropout({{uniform(0, 1)}})) + model.add(Dense({{choice([256, 512, 1024])}})) + model.add(Activation('relu')) + model.add(Dropout({{uniform(0, 1)}})) + model.add(Dense(10)) + model.add(Activation('softmax')) + + rms = RMSprop() + model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['acc']) + + model.fit(X_train, Y_train, + batch_size={{choice([64, 128])}}, + epochs=1, + verbose=2, + validation_data=(X_test, Y_test)) + score, acc = model.evaluate(X_test, Y_test, verbose=0) + print('Test accuracy:', acc) + return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), + 'weights': pickle.dumps(model.get_weights())} def test_hyper_param_model(spark_context): - def data(): - from keras.datasets import mnist - from keras.utils import np_utils - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - def model(X_train, Y_train, X_test, Y_test): - from keras.models import Sequential - from keras.layers.core import Dense, Dropout, Activation - from keras.optimizers import RMSprop - - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', optimizer=rms) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - nb_epoch=1, - show_accuracy=True, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), - 'weights': pickle.dumps(model.get_weights())} - hyperparam_model = HyperParamModel(spark_context) - hyperparam_model.minimize(model=model, data=data, max_evals=5) + hyperparam_model.minimize(model=model, data=data, max_evals=1) From 2cd54d64439ddbbff9ab66ff9ead63cbeb73d674 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:24:14 +0200 Subject: [PATCH 44/57] polish --- elephas/hyperparam.py | 1 - examples/hyperparam_optimization.py | 58 +++++++++++++++-------------- examples/ml_pipeline_otto.py | 12 +++--- examples/mnist_mlp_spark.py | 7 +--- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/elephas/hyperparam.py b/elephas/hyperparam.py index fdeb455..356e735 100644 --- a/elephas/hyperparam.py +++ b/elephas/hyperparam.py @@ -23,7 +23,6 @@ def __init__(self, sc, num_workers=4): def compute_trials(self, model, data, max_evals): model_string = get_hyperopt_model_string(model=model, data=data, functions=None, notebook_name=None, verbose=False, stack=3) - print(model_string) hyperas_worker = HyperasWorker(model_string, max_evals) dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) dummy_rdd = dummy_rdd.repartition(self.num_workers) diff --git a/examples/hyperparam_optimization.py b/examples/hyperparam_optimization.py index d8a981f..b94d133 100644 --- a/examples/hyperparam_optimization.py +++ b/examples/hyperparam_optimization.py @@ -16,20 +16,20 @@ def data(): """ from keras.datasets import mnist from keras.utils import np_utils - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 + (x_train, y_train), (x_test, y_test) = mnist.load_data() + x_train = x_train.reshape(60000, 784) + x_test = x_test.reshape(10000, 784) + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test + y_train = np_utils.to_categorical(y_train, nb_classes) + y_test = np_utils.to_categorical(y_test, nb_classes) + return x_train, y_train, x_test, y_test -def model(X_train, Y_train, X_test, Y_test): +def model(x_train, y_train, x_test, y_test): """Model providing function: Create Keras model with double curly brackets dropped-in as needed. @@ -43,27 +43,29 @@ def model(X_train, Y_train, X_test, Y_test): from keras.layers.core import Dense, Dropout, Activation from keras.optimizers import RMSprop - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) + keras_model = Sequential() + keras_model.add(Dense(512, input_shape=(784,))) + keras_model.add(Activation('relu')) + keras_model.add(Dropout({{uniform(0, 1)}})) + keras_model.add(Dense({{choice([256, 512, 1024])}})) + keras_model.add(Activation('relu')) + keras_model.add(Dropout({{uniform(0, 1)}})) + keras_model.add(Dense(10)) + keras_model.add(Activation('softmax')) rms = RMSprop() - model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['acc']) + keras_model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['acc']) - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) + keras_model.fit(x_train, y_train, + batch_size={{choice([64, 128])}}, + epochs=1, + verbose=2, + validation_data=(x_test, y_test)) + score, acc = keras_model.evaluate(x_test, y_test, verbose=0) print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pickle.dumps(model.get_weights())} + return {'loss': -acc, 'status': STATUS_OK, 'model': keras_model.to_yaml(), + 'weights': pickle.dumps(keras_model.get_weights())} + # Create Spark context conf = SparkConf().setAppName('Elephas_Hyperparameter_Optimization').setMaster('local[8]') diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index 013740a..cb18cb2 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -1,6 +1,6 @@ from __future__ import print_function - from __future__ import absolute_import + from pyspark.mllib.linalg import Vectors import numpy as np import random @@ -23,25 +23,28 @@ sc = SparkContext(conf=conf) sql_context = SQLContext(sc) + # Data loader def shuffle_csv(csv_file): lines = open(csv_file).readlines() random.shuffle(lines) open(csv_file, 'w').writelines(lines) + def load_data_rdd(csv_file, shuffle=True, train=True): if shuffle: shuffle_csv(csv_file) data = sc.textFile(data_path + csv_file) - data = data.filter(lambda x:x.split(',')[0] != 'id').map(lambda line: line.split(',')) + data = data.filter(lambda x: x.split(',')[0] != 'id').map(lambda line: line.split(',')) if train: data = data.map( lambda line: (Vectors.dense(np.asarray(line[1:-1]).astype(np.float32)), - str(line[-1]).replace('Class_', '')) ) + str(line[-1]).replace('Class_', ''))) else: - data = data.map(lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)), "1") ) + data = data.map(lambda line: (Vectors.dense(np.asarray(line[1:]).astype(np.float32)), "1")) return data + # Define Data frames train_df = sql_context.createDataFrame(load_data_rdd("train.csv"), ['features', 'category']) test_df = sql_context.createDataFrame(load_data_rdd("test.csv", shuffle=False, train=False), ['features', 'category']) @@ -91,7 +94,6 @@ def load_data_rdd(csv_file, shuffle=True, train=True): fitted_pipeline = pipeline.fit(train_df) # Evaluate Spark model - prediction = fitted_pipeline.transform(train_df) pnl = prediction.select("index_category", "prediction") pnl.show(100) diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py index fad0cde..3462de2 100644 --- a/examples/mnist_mlp_spark.py +++ b/examples/mnist_mlp_spark.py @@ -55,11 +55,8 @@ # Initialize SparkModel from Keras model and Spark context adagrad = elephas_optimizers.Adagrad() -spark_model = SparkModel(model, - optimizer=adagrad, - frequency='epoch', - mode='asynchronous', - num_workers=2,master_optimizer=sgd) +spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', + mode='asynchronous', num_workers=2, master_optimizer=sgd) # Train Spark model spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) From 5c8fcc2baf0d198cbd8c46b4c60f1c2728b7ab28 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Tue, 14 Aug 2018 17:45:28 +0200 Subject: [PATCH 45/57] mllib test --- elephas/spark_model.py | 4 +-- examples/mllib_mlp.py | 14 ++++----- requirements.txt | 2 +- tests/test_mllib_model.py | 63 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 tests/test_mllib_model.py diff --git a/elephas/spark_model.py b/elephas/spark_model.py index dcfd897..ddbf75d 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -191,13 +191,13 @@ def __init__(self, master_network, optimizer=None, mode='asynchronous', frequenc master_metrics=master_metrics, custom_objects=custom_objects, parameter_server_mode=parameter_server_mode, *args, **kwargs) - def train(self, labeled_points, nb_epoch=10, batch_size=32, verbose=0, validation_split=0.1, + def fit(self, labeled_points, epochs=10, batch_size=32, verbose=0, validation_split=0.1, categorical=False, nb_classes=None): """Train an elephas model on an RDD of LabeledPoints """ rdd = lp_to_simple_rdd(labeled_points, categorical, nb_classes) rdd = rdd.repartition(self.num_workers) - self._fit(rdd, nb_epoch, batch_size, verbose, validation_split) + self._fit(rdd=rdd, epochs=epochs, batch_size=batch_size, verbose=verbose, validation_split=validation_split) def predict(self, mllib_data): """Predict probabilities for an RDD of features diff --git a/examples/mllib_mlp.py b/examples/mllib_mlp.py index 78c93f3..18976b1 100644 --- a/examples/mllib_mlp.py +++ b/examples/mllib_mlp.py @@ -8,15 +8,14 @@ from keras.utils import np_utils from elephas.spark_model import SparkMLlibModel -from elephas.utils.rdd_utils import to_labeled_point, lp_to_simple_rdd -from elephas import optimizers as elephas_optimizers +from elephas.utils.rdd_utils import to_labeled_point from pyspark import SparkContext, SparkConf # Define basic parameters batch_size = 64 nb_classes = 10 -nb_epoch = 3 +epochs = 3 # Load data (x_train, y_train), (x_test, y_test) = mnist.load_data() @@ -53,15 +52,14 @@ # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) -rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) # Initialize SparkModel from Keras model and Spark context -adadelta = elephas_optimizers.Adadelta() -spark_model = SparkMLlibModel(sc, model, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2, master_optimizer=rms) +spark_model = SparkMLlibModel(master_network=model, frequency='epoch', mode='synchronous', + master_metrics=['acc']) # Train Spark model -spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, - validation_split=0.1, categorical=True, nb_classes=nb_classes) +spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, + validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) diff --git a/requirements.txt b/requirements.txt index 2f5a128..73e17db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ Flask==1.0.2 hyperas==0.4 -Keras==2.1.3 +Keras==2.2.2 Keras-Applications==1.0.4 Keras-Preprocessing==1.0.2 numpy==1.14.5 diff --git a/tests/test_mllib_model.py b/tests/test_mllib_model.py new file mode 100644 index 0000000..26bb417 --- /dev/null +++ b/tests/test_mllib_model.py @@ -0,0 +1,63 @@ +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Activation +from keras.optimizers import RMSprop +from keras.utils import np_utils + +from elephas.spark_model import SparkMLlibModel +from elephas.utils.rdd_utils import to_labeled_point, lp_to_simple_rdd +from elephas import optimizers as elephas_optimizers + +import pytest +pytest.mark.usefixtures("spark_context") + +# Define basic parameters +batch_size = 64 +nb_classes = 10 +epochs = 3 + +# Load data +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +x_train = x_train.reshape(60000, 784)[:1000] +x_test = x_test.reshape(10000, 784) +x_train = x_train.astype("float32") +x_test = x_test.astype("float32") +x_train /= 255 +x_test /= 255 +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') + +# Convert class vectors to binary class matrices +y_train = np_utils.to_categorical(y_train, nb_classes) +y_test = np_utils.to_categorical(y_test, nb_classes) + +model = Sequential() +model.add(Dense(128, input_dim=784)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(128)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(10)) +model.add(Activation('softmax')) + +# Compile model +rms = RMSprop() + + +def test_mllib_model(spark_context): + # Build RDD from numpy features and labels + lp_rdd = to_labeled_point(spark_context, x_train, y_train, categorical=True) + + # Initialize SparkModel from Keras model and Spark context + spark_model = SparkMLlibModel(master_network=model, frequency='epoch', mode='synchronous', + master_metrics=['acc']) + + # Train Spark model + spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, + validation_split=0.1, categorical=True, nb_classes=nb_classes) + + # Evaluate Spark model by evaluating the underlying model + score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) + print('Test accuracy:', score[1]) From 1068dc55cfcf99a406d7ee7dcc8fea4ed2fdd062 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:10:01 +0200 Subject: [PATCH 46/57] data to git ignore --- .gitignore | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 001e228..1592491 100644 --- a/.gitignore +++ b/.gitignore @@ -65,5 +65,7 @@ examples/*.csv .pytest_cache -test_env/ -venv/ \ No newline at end of file +venv/ + +train.csv +test.csv \ No newline at end of file From 46e2a63ec4e6c67f9d88d49c2da0c6b316632e20 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:10:19 +0200 Subject: [PATCH 47/57] all tests run --- tests/ml/test_params.py | 9 +++- tests/test_ml_model.py | 86 ++++++++++++++++++++++++++++++++++++++- tests/test_mllib_model.py | 7 ++-- tests/test_spark_model.py | 16 +++----- 4 files changed, 101 insertions(+), 17 deletions(-) diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py index e9baecb..0b13284 100644 --- a/tests/ml/test_params.py +++ b/tests/ml/test_params.py @@ -8,8 +8,15 @@ def test_has_keras_model_config(): assert conf == param.get_keras_model_config() +def test_has_elephas_optimizer_config(): + param = HasElephasOptimizerConfig() + conf = {"foo": "bar"} + param.set_elephas_optimizer_config(conf) + assert conf == param.get_elephas_optimizer_config() + + def test_has_optimizer_config(): - param = HasOptimizerConfig() + param = HasKerasOptimizerConfig() conf = {"foo": "bar"} param.set_optimizer_config(conf) assert conf == param.get_optimizer_config() diff --git a/tests/test_ml_model.py b/tests/test_ml_model.py index 1d7d70a..9cd4b5a 100644 --- a/tests/test_ml_model.py +++ b/tests/test_ml_model.py @@ -1 +1,85 @@ -# TODO test basic ml model \ No newline at end of file +from __future__ import absolute_import +from __future__ import print_function + +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Activation +from keras.utils import np_utils +from keras import optimizers + +from elephas.ml_model import ElephasEstimator +from elephas.ml.adapter import to_data_frame + +from pyspark import SparkContext, SparkConf +from pyspark.mllib.evaluation import MulticlassMetrics +from pyspark.ml import Pipeline + +import pytest +pytest.mark.usefixtures("spark_context") + +# Define basic parameters +batch_size = 64 +nb_classes = 10 +nb_epoch = 1 + +# Load data +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +x_train = x_train.reshape(60000, 784)[:1000] +x_test = x_test.reshape(10000, 784) +x_train = x_train.astype("float32") +x_test = x_test.astype("float32") +x_train /= 255 +x_test /= 255 +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') + +# Convert class vectors to binary class matrices +y_train = np_utils.to_categorical(y_train, nb_classes) +y_test = np_utils.to_categorical(y_test, nb_classes) + +model = Sequential() +model.add(Dense(128, input_dim=784)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(128)) +model.add(Activation('relu')) +model.add(Dropout(0.2)) +model.add(Dense(10)) +model.add(Activation('softmax')) + + +def test_spark_ml_model(spark_context): + + df = to_data_frame(spark_context, x_train, y_train, categorical=True) + test_df = to_data_frame(spark_context, x_test, y_test, categorical=True) + + sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) + sgd_conf = optimizers.serialize(sgd) + + # Initialize Spark ML Estimator + estimator = ElephasEstimator() + estimator.set_keras_model_config(model.to_yaml()) + estimator.set_optimizer_config(sgd_conf) + estimator.set_mode("synchronous") + estimator.set_loss("categorical_crossentropy") + estimator.set_metrics(['acc']) + estimator.set_nb_epoch(nb_epoch) + estimator.set_batch_size(batch_size) + estimator.set_validation_split(0.1) + estimator.set_categorical_labels(True) + estimator.set_nb_classes(nb_classes) + + # Fitting a model returns a Transformer + pipeline = Pipeline(stages=[estimator]) + fitted_pipeline = pipeline.fit(df) + + # Evaluate Spark model by evaluating the underlying model + prediction = fitted_pipeline.transform(test_df) + pnl = prediction.select("label", "prediction") + pnl.show(100) + + prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction)) + metrics = MulticlassMetrics(prediction_and_label) + print(metrics.precision()) + print(metrics.recall()) diff --git a/tests/test_mllib_model.py b/tests/test_mllib_model.py index 26bb417..8a142bb 100644 --- a/tests/test_mllib_model.py +++ b/tests/test_mllib_model.py @@ -5,8 +5,7 @@ from keras.utils import np_utils from elephas.spark_model import SparkMLlibModel -from elephas.utils.rdd_utils import to_labeled_point, lp_to_simple_rdd -from elephas import optimizers as elephas_optimizers +from elephas.utils.rdd_utils import to_labeled_point import pytest pytest.mark.usefixtures("spark_context") @@ -44,6 +43,7 @@ # Compile model rms = RMSprop() +model.compile(rms, 'categorical_crossentropy', ['acc']) def test_mllib_model(spark_context): @@ -51,8 +51,7 @@ def test_mllib_model(spark_context): lp_rdd = to_labeled_point(spark_context, x_train, y_train, categorical=True) # Initialize SparkModel from Keras model and Spark context - spark_model = SparkMLlibModel(master_network=model, frequency='epoch', mode='synchronous', - master_metrics=['acc']) + spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='synchronous') # Train Spark model spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, diff --git a/tests/test_spark_model.py b/tests/test_spark_model.py index 0090f34..60d8d3a 100644 --- a/tests/test_spark_model.py +++ b/tests/test_spark_model.py @@ -48,38 +48,32 @@ model.add(Dense(10)) model.add(Activation('softmax')) -sgd = SGD(lr=0.1) +model.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["acc"]) def test_spark_model_end_to_end(spark_context): rdd = to_simple_rdd(spark_context, x_train, y_train) - adagrad = elephas_optimizers.Adagrad() - # sync epoch - spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', - mode='synchronous', num_workers=2, master_optimizer=sgd) + spark_model = SparkModel(model, frequency='epoch', mode='synchronous', num_workers=2) spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) # sync batch - spark_model = SparkModel(model, optimizer=adagrad, frequency='batch', - mode='synchronous', num_workers=2, master_optimizer=sgd) + spark_model = SparkModel(model, frequency='batch', mode='synchronous', num_workers=2) spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) # async epoch - spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', - mode='asynchronous', num_workers=2, master_optimizer=sgd) + spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) # hogwild epoch - spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', - mode='hogwild', num_workers=2, master_optimizer=sgd) + spark_model = SparkModel(model, frequency='epoch', mode='hogwild') spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1]) \ No newline at end of file From ae3b01d6fc680ec89ce287b74feea5fe51381b91 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:10:32 +0200 Subject: [PATCH 48/57] update api --- elephas/ml/params.py | 63 +++++++++++++++++++++++++++++++++++++----- elephas/ml_model.py | 29 +++++++++---------- elephas/spark_model.py | 61 +++++++++++++++++++--------------------- elephas/worker.py | 11 +++++--- 4 files changed, 105 insertions(+), 59 deletions(-) diff --git a/elephas/ml/params.py b/elephas/ml/params.py index b76c7ab..8e210aa 100644 --- a/elephas/ml/params.py +++ b/elephas/ml/params.py @@ -19,19 +19,22 @@ def get_keras_model_config(self): return self.getOrDefault(self.keras_model_config) -class HasOptimizerConfig(Params): +class HasElephasOptimizerConfig(Params): """Parameter mixin for Elephas optimizer config """ def __init__(self): - super(HasOptimizerConfig, self).__init__() - self.optimizer_config = Param(self, "optimizer_config", "Serialized Elephas optimizer properties") + super(HasElephasOptimizerConfig, self).__init__() + self.elephas_optimizer_config = Param(self, "elephas_optimizer_config", + "Serialized Elephas optimizer properties") + self._setDefault(elephas_optimizer_config=None) - def set_optimizer_config(self, optimizer_config): - self._paramMap[self.optimizer_config] = optimizer_config + + def set_elephas_optimizer_config(self, elephas_optimizer_config): + self._paramMap[self.elephas_optimizer_config] = elephas_optimizer_config return self - def get_optimizer_config(self): - return self.getOrDefault(self.optimizer_config) + def get_elephas_optimizer_config(self): + return self.getOrDefault(self.elephas_optimizer_config) class HasMode(Params): @@ -180,3 +183,49 @@ def set_num_workers(self, num_workers): def get_num_workers(self): return self.getOrDefault(self.num_workers) + + +class HasKerasOptimizerConfig(Params): + """Parameter mixin for Keras optimizer config + """ + def __init__(self): + super(HasKerasOptimizerConfig, self).__init__() + self.optimizer_config = Param(self, "optimizer_config", "Serialized Keras optimizer properties") + self._setDefault(optimizer_config=None) + + def set_optimizer_config(self, optimizer_config): + self._paramMap[self.optimizer_config] = optimizer_config + return self + + def get_optimizer_config(self): + return self.getOrDefault(self.optimizer_config) + + +class HasMetrics(Params): + """Parameter mixin for Keras metrics + """ + def __init__(self): + super(HasMetrics, self).__init__() + self.metrics = Param(self, "metrics", "Keras metrics") + + def set_metrics(self, metrics): + self._paramMap[self.metrics] = metrics + return self + + def get_metrics(self): + return self.getOrDefault(self.metrics) + + +class HasLoss(Params): + """Parameter mixin for Keras metrics + """ + def __init__(self): + super(HasLoss, self).__init__() + self.loss = Param(self, "loss", "Keras loss") + + def set_loss(self, loss): + self._paramMap[self.loss] = loss + return self + + def get_loss(self): + return self.getOrDefault(self.loss) diff --git a/elephas/ml_model.py b/elephas/ml_model.py index afedcc4..caeca00 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -8,27 +8,20 @@ from pyspark.sql.types import StringType, DoubleType, StructField from keras.models import model_from_yaml +from keras.optimizers import get as get_optimizer + from .spark_model import SparkModel from .utils.rdd_utils import from_vector from .ml.adapter import df_to_simple_rdd -from .ml.params import HasCategoricalLabels -from .ml.params import HasValidationSplit -from .ml.params import HasKerasModelConfig -from .ml.params import HasMode -from .ml.params import HasEpochs -from .ml.params import HasBatchSize -from .ml.params import HasFrequency -from .ml.params import HasVerbosity -from .ml.params import HasNumberOfClasses -from .ml.params import HasNumberOfWorkers -from .ml.params import HasOptimizerConfig +from .ml.params import * from .optimizers import get class ElephasEstimator(Estimator, HasCategoricalLabels, HasValidationSplit, HasKerasModelConfig, HasFeaturesCol, HasLabelCol, HasMode, HasEpochs, HasBatchSize, HasFrequency, HasVerbosity, HasNumberOfClasses, - HasNumberOfWorkers, HasOptimizerConfig, HasOutputCol): + HasNumberOfWorkers, HasElephasOptimizerConfig, HasOutputCol, HasLoss, + HasMetrics, HasKerasOptimizerConfig): """ SparkML Estimator implementation of an elephas model. This estimator takes all relevant arguments for model compilation and training. @@ -52,14 +45,18 @@ def _fit(self, df): simple_rdd = df_to_simple_rdd(df, categorical=self.get_categorical_labels(), nb_classes=self.get_nb_classes(), features_col=self.getFeaturesCol(), label_col=self.getLabelCol()) simple_rdd = simple_rdd.repartition(self.get_num_workers()) - optimizer = None - if self.get_optimizer_config() is not None: - optimizer = get({'class_name': self.get_optimizer_config()['class_name'], + elephas_optimizer = None + if self.get_elephas_optimizer_config() is not None: + elephas_optimizer = get({'class_name': self.get_optimizer_config()['class_name'], 'config': self.get_optimizer_config()}) keras_model = model_from_yaml(self.get_keras_model_config()) + metrics = self.get_metrics() + loss = self.get_loss() + optimizer = get_optimizer(self.get_optimizer_config()) + keras_model.compile(loss=loss, optimizer=optimizer, metrics=metrics) - spark_model = SparkModel(keras_model, optimizer=optimizer, + spark_model = SparkModel(model=keras_model, elephas_optimizer=elephas_optimizer, mode=self.get_mode(), frequency=self.get_frequency(), num_workers=self.get_num_workers()) spark_model.fit(simple_rdd, epochs=self.get_nb_epoch(), batch_size=self.get_batch_size(), diff --git a/elephas/spark_model.py b/elephas/spark_model.py index ddbf75d..38fb885 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -2,6 +2,7 @@ from __future__ import print_function import pyspark +from keras.optimizers import serialize as serialize_optimizer from .utils import lp_to_simple_rdd from .utils import model_to_dict @@ -14,44 +15,46 @@ class SparkModel(object): - def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', - num_workers=None, master_optimizer="sgd", master_loss="categorical_crossentropy", - master_metrics=None, custom_objects=None, parameter_server_mode='http', *args, **kwargs): + def __init__(self, model, mode='asynchronous', frequency='epoch', parameter_server_mode='http', num_workers=None, + elephas_optimizer=None, custom_objects=None, *args, **kwargs): """SparkModel Base class for distributed training on RDDs. Spark model takes a Keras model as master network, an optimization scheme, a parallelisation mode and an averaging frequency. - :param master_network: Keras model (not compiled) - :param optimizer: Elephas optimizer + :param model: Compiled Keras model :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` :param frequency: String, either `epoch` or `batch` + :param parameter_server_mode: String, either `http` or `socket` :param num_workers: int, number of workers used for training (defaults to None) - :param master_optimizer: Keras optimizer for master network - :param master_loss: Keras loss function for master network - :param master_metrics: Keras metrics used for master network + :param elephas_optimizer: Elephas optimizer :param custom_objects: Keras custom objects - :param parameter_server_mode: String, either `http` or `socket` """ - self._master_network = master_network + self._master_network = model + if not hasattr(model, "loss"): + raise Exception("Compile your Keras model before initializing an Elephas model with it") + metrics = model.metrics + loss = model.loss + optimizer = serialize_optimizer(model.optimizer) + if custom_objects is None: custom_objects = {} - if master_metrics is None: - master_metrics = ["accuracy"] - if optimizer is None: + if metrics is None: + metrics = ["accuracy"] + if elephas_optimizer is None: self.optimizer = SGD() else: - self.optimizer = optimizer + self.optimizer = elephas_optimizer self.mode = mode self.frequency = frequency self.num_workers = num_workers - self.weights = master_network.get_weights() + self.weights = self._master_network.get_weights() self.pickled_weights = None - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics + self.master_optimizer = optimizer + self.master_loss = loss + self.master_metrics = metrics self.custom_objects = custom_objects self.parameter_server_mode = parameter_server_mode @@ -167,29 +170,23 @@ def _fit(self, rdd, epochs, batch_size, verbose, validation_split): class SparkMLlibModel(SparkModel): - def __init__(self, master_network, optimizer=None, mode='asynchronous', frequency='epoch', num_workers=4, - master_optimizer="adam", master_loss="categorical_crossentropy", - master_metrics=None, custom_objects=None, parameter_server_mode='http', - *args, **kwargs): + def __init__(self, model, mode='asynchronous', frequency='epoch', parameter_server_mode='http', + num_workers=4, elephas_optimizer=None, custom_objects=None, *args, **kwargs): """SparkMLlibModel The Spark MLlib model takes RDDs of LabeledPoints for training. - :param master_network: Keras model (not compiled) - :param optimizer: Elephas optimizer + :param model: Compiled Keras model :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` :param frequency: String, either `epoch` or `batch` + :param parameter_server_mode: String, either `http` or `socket` :param num_workers: int, number of workers used for training (defaults to None) - :param master_optimizer: Keras optimizer for master network - :param master_loss: Keras loss function for master network - :param master_metrics: Keras metrics used for master network + :param elephas_optimizer: Elephas optimizer :param custom_objects: Keras custom objects - :param parameter_server_mode: String, either `http` or `socket """ - SparkModel.__init__(self, master_network=master_network, optimizer=optimizer, mode=mode, frequency=frequency, - num_workers=num_workers, master_optimizer=master_optimizer, master_loss=master_loss, - master_metrics=master_metrics, custom_objects=custom_objects, - parameter_server_mode=parameter_server_mode, *args, **kwargs) + SparkModel.__init__(self, model=model, mode=mode, frequency=frequency, + parameter_server_mode=parameter_server_mode, num_workers=num_workers, + elephas_optimizer=elephas_optimizer, custom_objects=custom_objects, *args, **kwargs) def fit(self, labeled_points, epochs=10, batch_size=32, verbose=0, validation_split=0.1, categorical=False, nb_classes=None): diff --git a/elephas/worker.py b/elephas/worker.py index cf28b41..a04758a 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -2,6 +2,7 @@ from itertools import tee from keras.utils.generic_utils import slice_arrays from keras.models import model_from_yaml +from keras.optimizers import get as get_optimizer from .utils import subtract_params from .parameter import SocketClient, HttpClient @@ -15,7 +16,7 @@ def __init__(self, yaml, parameters, train_config, master_optimizer, self.yaml = yaml self.parameters = parameters self.train_config = train_config - self.master_optimizer = "sgd" # TODO + self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics self.custom_objects = custom_objects @@ -24,8 +25,9 @@ def __init__(self, yaml, parameters, train_config, master_optimizer, def train(self, data_iterator): """Train a keras model on a worker """ + optimizer = get_optimizer(self.master_optimizer) self.model = model_from_yaml(self.yaml, self.custom_objects) - self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.model.compile(optimizer=optimizer, loss=self.master_loss, metrics=self.master_metrics) self.model.set_weights(self.parameters.value) feature_iterator, label_iterator = tee(data_iterator, 2) @@ -57,7 +59,7 @@ def __init__(self, yaml, parameters, parameter_server_mode, train_config, freque self.train_config = train_config self.frequency = frequency - self.master_optimizer = "sgd" # TODO master_optimizer + self.master_optimizer = master_optimizer self.master_loss = master_loss self.master_metrics = master_metrics self.yaml = yaml @@ -76,8 +78,9 @@ def train(self, data_iterator): if x_train.size == 0: return + optimizer = get_optimizer(self.master_optimizer) self.model = model_from_yaml(self.yaml, self.custom_objects) - self.model.compile(optimizer=self.master_optimizer, loss=self.master_loss, metrics=self.master_metrics) + self.model.compile(optimizer=optimizer, loss=self.master_loss, metrics=self.master_metrics) self.model.set_weights(self.parameters.value) nb_epoch = self.train_config['nb_epoch'] From 79ba91ec543520140a20cfceb1eb9dfa752d122f Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:10:44 +0200 Subject: [PATCH 49/57] all examples run --- examples/ml_mlp.py | 21 ++++++++------------- examples/ml_pipeline_otto.py | 13 ++++++++++--- examples/mllib_mlp.py | 4 ++-- examples/mnist_mlp_spark.py | 5 ++--- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/examples/ml_mlp.py b/examples/ml_mlp.py index 2522e1b..52a9422 100644 --- a/examples/ml_mlp.py +++ b/examples/ml_mlp.py @@ -4,12 +4,11 @@ from keras.datasets import mnist from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import Adam from keras.utils import np_utils +from keras import optimizers from elephas.ml_model import ElephasEstimator from elephas.ml.adapter import to_data_frame -from elephas import optimizers as elephas_optimizers from pyspark import SparkContext, SparkConf from pyspark.mllib.evaluation import MulticlassMetrics @@ -47,11 +46,6 @@ model.add(Dense(10)) model.add(Activation('softmax')) - -# Compile model -adam = Adam() -model.compile(loss='categorical_crossentropy', optimizer=adam) - # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') sc = SparkContext(conf=conf) @@ -60,17 +54,18 @@ df = to_data_frame(sc, x_train, y_train, categorical=True) test_df = to_data_frame(sc, x_test, y_test, categorical=True) -# Define elephas optimizer -adadelta = elephas_optimizers.Adadelta() +sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) +sgd_conf = optimizers.serialize(sgd) # Initialize Spark ML Estimator estimator = ElephasEstimator() estimator.set_keras_model_config(model.to_yaml()) -estimator.set_optimizer_config(adadelta.get_config()) +estimator.set_optimizer_config(sgd_conf) +estimator.set_mode("synchronous") +estimator.set_loss("categorical_crossentropy") +estimator.set_metrics(['acc']) estimator.set_nb_epoch(nb_epoch) estimator.set_batch_size(batch_size) -estimator.set_num_workers(1) -estimator.set_verbosity(0) estimator.set_validation_split(0.1) estimator.set_categorical_labels(True) estimator.set_nb_classes(nb_classes) @@ -84,7 +79,7 @@ pnl = prediction.select("label", "prediction") pnl.show(100) -prediction_and_label = pnl.map(lambda row: (row.label, row.prediction)) +prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction)) metrics = MulticlassMetrics(prediction_and_label) print(metrics.precision()) print(metrics.recall()) diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index cb18cb2..4fbd9db 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -10,8 +10,9 @@ from pyspark.ml.feature import StringIndexer, StandardScaler from pyspark.ml import Pipeline +from keras import optimizers from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation +from keras.layers import Dense, Dropout, Activation from elephas.ml_model import ElephasEstimator from elephas import optimizers as elephas_optimizers @@ -72,15 +73,21 @@ def load_data_rdd(csv_file, shuffle=True, train=True): model.compile(loss='categorical_crossentropy', optimizer='adam') +sgd = optimizers.SGD(lr=0.01) +sgd_conf = optimizers.serialize(sgd) # Initialize Elephas Spark ML Estimator adadelta = elephas_optimizers.Adadelta() estimator = ElephasEstimator() +estimator.set_keras_model_config(model.to_yaml()) +estimator.set_optimizer_config(sgd_conf) +estimator.set_mode("synchronous") +estimator.set_loss("categorical_crossentropy") +estimator.set_metrics(['acc']) estimator.setFeaturesCol("scaled_features") estimator.setLabelCol("index_category") -estimator.set_keras_model_config(model.to_yaml()) -estimator.set_optimizer_config(adadelta.get_config()) +estimator.set_elephas_optimizer_config(adadelta.get_config()) estimator.set_nb_epoch(10) estimator.set_batch_size(128) estimator.set_num_workers(1) diff --git a/examples/mllib_mlp.py b/examples/mllib_mlp.py index 18976b1..70b1df5 100644 --- a/examples/mllib_mlp.py +++ b/examples/mllib_mlp.py @@ -45,6 +45,7 @@ # Compile model rms = RMSprop() +model.compile(rms, "categorical_crossentropy", ['acc']) # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') @@ -54,8 +55,7 @@ lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) # Initialize SparkModel from Keras model and Spark context -spark_model = SparkMLlibModel(master_network=model, frequency='epoch', mode='synchronous', - master_metrics=['acc']) +spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='synchronous') # Train Spark model spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, diff --git a/examples/mnist_mlp_spark.py b/examples/mnist_mlp_spark.py index 3462de2..4975257 100644 --- a/examples/mnist_mlp_spark.py +++ b/examples/mnist_mlp_spark.py @@ -49,14 +49,13 @@ model.add(Activation('softmax')) sgd = SGD(lr=0.1) +model.compile(sgd, 'categorical_crossentropy', ['acc']) # Build RDD from numpy features and labels rdd = to_simple_rdd(sc, x_train, y_train) # Initialize SparkModel from Keras model and Spark context -adagrad = elephas_optimizers.Adagrad() -spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', - mode='asynchronous', num_workers=2, master_optimizer=sgd) +spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') # Train Spark model spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1) From b0ae5959af085d761acf38e9c2de77b66b700f1d Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:11:08 +0200 Subject: [PATCH 50/57] update Docker --- Dockerfile | 113 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/Dockerfile b/Dockerfile index 61c3358..5d9810f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,54 +1,73 @@ -FROM gw000/keras:2.1.3-py3-tf-gpu -MAINTAINER gw0 [http://gw.tnode.com/] - -# install py3-tf-cpu/gpu (Python 3, TensorFlow, CPU/GPU) -RUN apt-get update -qq \ - && apt-get install --no-install-recommends -y \ - # install python 3 - python3 \ - python3-dev \ - python3-pip \ - python3-setuptools \ - python3-virtualenv \ - pkg-config \ - # requirements for numpy - libopenblas-base \ - python3-numpy \ - python3-scipy \ - # requirements for keras - python3-h5py \ - python3-yaml \ - python3-pydot \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -ARG TENSORFLOW_VERSION=1.10.0 -ARG TENSORFLOW_DEVICE=gpu -ARG TENSORFLOW_APPEND=_gpu -RUN pip3 --no-cache-dir install https://storage.googleapis.com/tensorflow/linux/${TENSORFLOW_DEVICE}/tensorflow${TENSORFLOW_APPEND}-${TENSORFLOW_VERSION}-cp35-cp35m-linux_x86_64.whl - -ARG KERAS_VERSION=2.1.3 -ENV KERAS_BACKEND=tensorflow -RUN pip3 --no-cache-dir install git+https://github.com/fchollet/keras.git@${KERAS_VERSION} - -# install additional debian packages -RUN apt-get update -qq \ - && apt-get install --no-install-recommends -y \ - # system tools - less \ - procps \ - vim-tiny \ - # build dependencies - build-essential \ - libffi-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +ARG cuda_version=9.0 +ARG cudnn_version=7 +FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel +# Install system packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + bzip2 \ + g++ \ + git \ + graphviz \ + libgl1-mesa-glx \ + libhdf5-dev \ + openmpi-bin \ + wget && \ + rm -rf /var/lib/apt/lists/* + +# Install conda +ENV CONDA_DIR /opt/conda +ENV PATH $CONDA_DIR/bin:$PATH + +RUN wget --quiet --no-check-certificate https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \ + echo "c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a *Miniconda3-4.2.12-Linux-x86_64.sh" | sha256sum -c - && \ + /bin/bash /Miniconda3-4.2.12-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ + rm Miniconda3-4.2.12-Linux-x86_64.sh && \ + echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh + +# Install Python packages and keras +ENV NB_USER keras +ENV NB_UID 1000 + +RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ + chown $NB_USER $CONDA_DIR -R && \ + mkdir -p /src && \ + chown $NB_USER /src + +USER $NB_USER + +ARG python_version=2.7 + +RUN conda install -y python=${python_version} && \ + pip install --upgrade pip && \ + pip install \ + sklearn_pandas \ + tensorflow-gpu && \ + conda install \ + bcolz \ + h5py \ + matplotlib \ + mkl \ + nose \ + notebook \ + Pillow \ + pandas \ + pygpu \ + pyyaml \ + scikit-learn \ + six \ + conda clean -yt + +ENV PYTHONPATH='/src/:$PYTHONPATH' RUN mkdir -p app WORKDIR /app COPY ./requirements.txt /app # Install requirements -RUN pip3 install -r ./requirements.txt -RUN pip3 install git+https://github.com/hyperopt/hyperopt.git \ No newline at end of file +RUN pip install -r ./requirements.txt +RUN pip install git+https://github.com/hyperopt/hyperopt.git + + +EXPOSE 8888 + +CMD jupyter notebook --port=8888 --ip=0.0.0.0 \ No newline at end of file From c6ba936cae1b227d4861bfd278dc83c460640dd6 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:11:19 +0200 Subject: [PATCH 51/57] update readme --- README.md | 129 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index cb760cd..e9cd3df 100644 --- a/README.md +++ b/README.md @@ -33,28 +33,39 @@ Table of content: - [Literature](#literature) ## Introduction -Elephas brings deep learning with [Keras](http://keras.io) to [Spark](http://spark.apache.org). Elephas intends to keep the simplicity and high usability of Keras, thereby allowing for fast prototyping of distributed models, which can be run on massive data sets. For an introductory example, see the following [iPython notebook](https://github.com/maxpumperla/elephas/blob/master/examples/Spark_ML_Pipeline.ipynb). - -ἐλέφας is Greek for _ivory_ and an accompanying project to κέρας, meaning _horn_. If this seems weird mentioning, like a bad dream, you should confirm it actually is at the [Keras documentation](https://github.com/fchollet/keras/blob/master/README.md). Elephas also means _elephant_, as in stuffed yellow elephant. - -Elephas implements a class of data-parallel algorithms on top of Keras, using Spark's RDDs and data frames. Keras Models are initialized on the driver, then serialized and shipped to workers, alongside with data and broadcasted model parameters. Spark workers deserialize the model, train their chunk of data and send their gradients back to the driver. The "master" model on the driver is updated by an optimizer, which takes gradients either synchronously or asynchronously. +Elephas brings deep learning with [Keras](http://keras.io) to [Spark](http://spark.apache.org). Elephas intends to +keep the simplicity and high usability of Keras, thereby allowing for fast prototyping of distributed models, which +can be run on massive data sets. For an introductory example, see the following +[iPython notebook](https://github.com/maxpumperla/elephas/blob/master/examples/Spark_ML_Pipeline.ipynb). + +ἐλέφας is Greek for _ivory_ and an accompanying project to κέρας, meaning _horn_. If this seems weird mentioning, like +a bad dream, you should confirm it actually is at the +[Keras documentation](https://github.com/fchollet/keras/blob/master/README.md). +Elephas also means _elephant_, as in stuffed yellow elephant. + +Elephas implements a class of data-parallel algorithms on top of Keras, using Spark's RDDs and data frames. +Keras Models are initialized on the driver, then serialized and shipped to workers, alongside with data and broadcasted +model parameters. Spark workers deserialize the model, train their chunk of data and send their gradients back to the +driver. The "master" model on the driver is updated by an optimizer, which takes gradients either synchronously or +asynchronously. ## Getting started ### Installation -Install elephas from PyPI with, Spark will be installed through `pyspark` for you. +Just install elephas from PyPI with, Spark will be installed through `pyspark` for you. + ``` pip install elephas ``` - ### Using Docker -Install and get Docker running by following the instructions here (https://www.docker.com/). +Install and get Docker running by following the [instructions here](https://www.docker.com/). #### Building -The build takes quite a while to run the first time since many packages need to be downloaded and installed. In the same directory as the ```Dockerfile``` run the following commands +The build takes quite a while to run the first time since many packages need to be downloaded and installed. In the +same directory as the ```Dockerfile``` run the following commands ``` docker build . -t pyspark/elephas @@ -62,7 +73,8 @@ docker build . -t pyspark/elephas #### Running -The following command starts a container with the Notebook server listening for HTTP connections on port 8899 (since local Jupyter notebooks use 8888) without authentication configured. +The following command starts a container with the Notebook server listening for HTTP connections on port +8899 (since local Jupyter notebooks use 8888) without authentication configured. ``` docker run -d -p 8899:8888 pyspark/elephas @@ -113,29 +125,36 @@ from elephas.utils.rdd_utils import to_simple_rdd rdd = to_simple_rdd(sc, X_train, Y_train) ``` -- A SparkModel is defined by passing Spark context and Keras model. Additionally, one has choose an optimizer used for updating the elephas model, an update frequency, a parallelization mode and the degree of parallelism, i.e. the number of workers. +- A SparkModel is defined by passing Spark context and Keras model. Additionally, one has choose an optimizer used for +updating the elephas model, an update frequency, a parallelization mode and the degree of parallelism, i.e. the number +of workers. ```python from elephas.spark_model import SparkModel from elephas import optimizers as elephas_optimizers adagrad = elephas_optimizers.Adagrad() -spark_model = SparkModel(sc,model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2) -spark_model.train(rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1) +spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2) +spark_model.train(rdd, epochs=20, batch_size=32, verbose=0, validation_split=0.1) ``` - Run your script using spark-submit ``` spark-submit --driver-memory 1G ./your_script.py ``` -Increasing the driver memory even further may be necessary, as the set of parameters in a network may be very large and collecting them on the driver eats up a lot of resources. See the examples folder for a few working examples. +Increasing the driver memory even further may be necessary, as the set of parameters in a network may be very large +and collecting them on the driver eats up a lot of resources. See the examples folder for a few working examples. ### Spark MLlib example -Following up on the last example, to create an RDD of LabeledPoints for supervised training from pairs of numpy arrays, use +Following up on the last example, to create an RDD of LabeledPoints for supervised training from pairs of +numpy arrays, use + ```python from elephas.utils.rdd_utils import to_labeled_point lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True) ``` + Training a given LabeledPoint-RDD is very similar to what we've seen already + ```python from elephas.spark_model import SparkMLlibModel adadelta = elephas_optimizers.Adadelta() @@ -157,9 +176,10 @@ estimator = ElephasEstimator(sc,model, fitted_model = estimator.fit(df) ``` -Fitting an estimator results in a SparkML transformer, which we can use for predictions and other evaluations by calling the transform method on it. +Fitting an estimator results in a SparkML transformer, which we can use for predictions and other evaluations by +calling the transform method on it. -``` python +```python prediction = fitted_model.transform(test_df) pnl = prediction.select("label", "prediction") pnl.show(100) @@ -177,11 +197,13 @@ In the first example above we have seen that an elephas model is instantiated li ```python spark_model = SparkModel(sc,model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2) ``` -So, apart from the canonical Spark context and Keras model, Elephas models have four parameters to tune and we will describe each of them next. +So, apart from the canonical Spark context and Keras model, Elephas models have four parameters to tune and +we will describe each of them next. ### Model updates (optimizers) -`optimizer`: The optimizers module in elephas is an adaption of the same module in keras, i.e. it provides the user with the following list of optimizers: +`optimizer`: The optimizers module in elephas is an adaption of the same module in keras, i.e. it provides the +user with the following list of optimizers: - `SGD` - `RMSprop` @@ -189,38 +211,59 @@ So, apart from the canonical Spark context and Keras model, Elephas models have - `Adadelta` - `Adam` -Once constructed, each of these can be passed to the *optimizer* parameter of the model. Updates in keras are computed with the help of theano, so most of the data structures in keras optimizers stem from theano. In elephas, gradients have already been computed by the respective workers, so it makes sense to entirely work with numpy arrays internally. +Once constructed, each of these can be passed to the *optimizer* parameter of the model. Updates in keras are +computed with the help of theano, so most of the data structures in keras optimizers stem from theano. In +elephas, gradients have already been computed by the respective workers, so it makes sense to entirely work +with numpy arrays internally. -Note that in order to set up an elephas model, you have to specify two optimizers, one for elephas and one for the underlying keras model. Individual workers produce updates according to keras optimizers and the "master" model on the driver uses elephas optimizers to aggregate them. For starters, we recommend keras models with SGD and elephas models with Adagrad or Adadelta. +Note that in order to set up an elephas model, you have to specify two optimizers, one for elephas and one for the +underlying keras model. Individual workers produce updates according to keras optimizers and the "master" model on the +driver uses elephas optimizers to aggregate them. For starters, we recommend keras models with SGD and elephas models +with Adagrad or Adadelta. ### Update frequency -`frequency`: The user can decide how often updates are passed to the master model by controlling the *frequency* parameter. To update every batch, choose 'batch' and to update only after every epoch, choose 'epoch'. +`frequency`: The user can decide how often updates are passed to the master model by controlling the *frequency* +parameter. To update every batch, choose 'batch' and to update only after every epoch, choose 'epoch'. ### Update mode -`mode`: Currently, there's three different modes available in elephas, each corresponding to a different heuristic or parallelization scheme adopted, which is controlled by the *mode* parameter. The default property is 'asynchronous'. +`mode`: Currently, there's three different modes available in elephas, each corresponding to a different heuristic or +parallelization scheme adopted, which is controlled by the *mode* parameter. The default property is 'asynchronous'. #### Asynchronous updates with read and write locks (`mode='asynchronous'`) -This mode implements the algorithm described as *downpour* in [1], i.e. each worker can send updates whenever they are ready. The master model makes sure that no update gets lost, i.e. multiple updates get applied at the "same" time, by locking the master parameters while reading and writing parameters. This idea has been used in Google's DistBelief framework. +This mode implements the algorithm described as *downpour* in [1], i.e. each worker can send updates whenever they +are ready. The master model makes sure that no update gets lost, i.e. multiple updates get applied at the "same" time, +by locking the master parameters while reading and writing parameters. This idea has been used in Google's DistBelief +framework. #### Asynchronous updates without locks (`mode='hogwild'`) -Essentially the same procedure as above, but without requiring the locks. This heuristic assumes that we still fare well enough, even if we loose an update here or there. Updating parameters lock-free in a non-distributed setting for SGD goes by the name 'Hogwild!' [2], it's distributed extension is called 'Dogwild!' [3]. +Essentially the same procedure as above, but without requiring the locks. This heuristic assumes that we still fare +well enough, even if we loose an update here or there. Updating parameters lock-free in a non-distributed setting +for SGD goes by the name 'Hogwild!' [2], it's distributed extension is called 'Dogwild!' [3]. #### Synchronous updates (`mode='synchronous'`) -In this mode each worker sends a new batch of parameter updates at the same time, which are then processed on the master. Accordingly, this algorithm is sometimes called *batch synchronous parallel* or just BSP. +In this mode each worker sends a new batch of parameter updates at the same time, which are then processed on the +master. Accordingly, this algorithm is sometimes called *batch synchronous parallel* or just BSP. ### Degree of parallelization (number of workers) -`num_workers`: Lastly, the degree to which we parallelize our training data is controlled by the parameter *num_workers*. +`num_workers`: Lastly, the degree to which we parallelize our training data is controlled by the +parameter *num_workers*. ## Distributed hyper-parameter optimization -Hyper-parameter optimization with elephas is based on [hyperas](https://github.com/maxpumperla/hyperas), a convenience wrapper for hyperopt and keras. Make sure to have at least version ```0.1.2``` of hyperas installed. Each Spark worker executes a number of trials, the results get collected and the best model is returned. As the distributed mode in hyperopt (using MongoDB), is somewhat difficult to configure and error prone at the time of writing, we chose to implement parallelization ourselves. Right now, the only available optimization algorithm is random search. +Hyper-parameter optimization with elephas is based on [hyperas](https://github.com/maxpumperla/hyperas), a convenience +wrapper for hyperopt and keras. Each Spark worker executes a number of trials, the results get collected and the best +model is returned. As the distributed mode in hyperopt (using MongoDB), is somewhat difficult to configure and error +prone at the time of writing, we chose to implement parallelization ourselves. Right now, the only available +optimization algorithm is random search. -The first part of this example is more or less directly taken from the hyperas documentation. We define data and model as functions, hyper-parameter ranges are defined through braces. See the hyperas documentation for more on how this works. +The first part of this example is more or less directly taken from the hyperas documentation. We define data and model +as functions, hyper-parameter ranges are defined through braces. See the hyperas documentation for more on how +this works. ```python from __future__ import print_function @@ -307,7 +350,9 @@ hyperparam_model.minimize(model=model, data=data, max_evals=5) ## Distributed training of ensemble models -Building on the last section, it is possible to train ensemble models with elephas by means of running hyper-parameter optimization on large search spaces and defining a resulting voting classifier on the top-n performing models. With ```data``` and ```model``` defined as above, this is a simple as running +Building on the last section, it is possible to train ensemble models with elephas by means of running hyper-parameter +optimization on large search spaces and defining a resulting voting classifier on the top-n performing models. +With ```data``` and ```model``` defined as above, this is a simple as running ```python result = hyperparam_model.best_ensemble(nb_ensemble_models=10, model=model, data=data, max_evals=5) @@ -316,21 +361,23 @@ In this example an ensemble of 10 models is built, based on optimization of at m ## Discussion -Premature parallelization may not be the root of all evil, but it may not always be the best idea to do so. Keep in mind that more workers mean less data per worker and parallelizing a model is not an excuse for actual learning. So, if you can perfectly well fit your data into memory *and* you're happy with training speed of the model consider just using keras. - -One exception to this rule may be that you're already working within the Spark ecosystem and want to leverage what's there. The above SparkML example shows how to use evaluation modules from Spark and maybe you wish to further process the outcome of an elephas model down the road. In this case, we recommend to use elephas as a simple wrapper by setting num_workers=1. - -Note that right now elephas restricts itself to data-parallel algorithms for two reasons. First, Spark simply makes it very easy to distribute data. Second, neither Spark nor Theano make it particularly easy to split up the actual model in parts, thus making model-parallelism practically impossible to realize. - -Having said all that, we hope you learn to appreciate elephas as a pretty easy to setup and use playground for data-parallel deep-learning algorithms. +Premature parallelization may not be the root of all evil, but it may not always be the best idea to do so. Keep in +mind that more workers mean less data per worker and parallelizing a model is not an excuse for actual learning. +So, if you can perfectly well fit your data into memory *and* you're happy with training speed of the model consider +just using keras. +One exception to this rule may be that you're already working within the Spark ecosystem and want to leverage what's +there. The above SparkML example shows how to use evaluation modules from Spark and maybe you wish to further process +the outcome of an elephas model down the road. In this case, we recommend to use elephas as a simple wrapper by setting +num_workers=1. -## Future work & contributions +Note that right now elephas restricts itself to data-parallel algorithms for two reasons. First, Spark simply makes it +very easy to distribute data. Second, neither Spark nor Theano make it particularly easy to split up the actual model +in parts, thus making model-parallelism practically impossible to realize. -Constructive feedback and pull requests for elephas are very welcome. Here's a few things we're having in mind for future development +Having said all that, we hope you learn to appreciate elephas as a pretty easy to setup and use playground for +data-parallel deep-learning algorithms. -- Benchmarks for training speed and accuracy. -- Some real-world tests on EC2 instances with large data sets like imagenet. ## Literature [1] J. Dean, G.S. Corrado, R. Monga, K. Chen, M. Devin, QV. Le, MZ. Mao, M’A. Ranzato, A. Senior, P. Tucker, K. Yang, and AY. Ng. [Large Scale Distributed Deep Networks](http://research.google.com/archive/large_deep_networks_nips2012.html). From a07e656f7c3524855ae98021f012c9a85ce0b090 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 12:44:29 +0200 Subject: [PATCH 52/57] all examples run --- elephas/ml/adapter.py | 7 +++++-- examples/ml_pipeline_otto.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/elephas/ml/adapter.py b/elephas/ml/adapter.py index ecc9840..80b20fb 100644 --- a/elephas/ml/adapter.py +++ b/elephas/ml/adapter.py @@ -3,7 +3,7 @@ from pyspark.sql import SQLContext from pyspark.mllib.regression import LabeledPoint from ..utils.rdd_utils import from_labeled_point, to_labeled_point, lp_to_simple_rdd - +from pyspark.mllib.linalg import Vector as MLLibVector, Vectors as MLLibVectors def to_data_frame(sc, features, labels, categorical=False): """Convert numpy arrays of features and labels into Spark DataFrame @@ -29,6 +29,9 @@ def df_to_simple_rdd(df, categorical=False, nb_classes=None, features_col='featu sql_context.registerDataFrameAsTable(df, "temp_table") selected_df = sql_context.sql( "SELECT {0} AS features, {1} as label from temp_table".format(features_col, label_col)) - lp_rdd = selected_df.rdd.map(lambda row: LabeledPoint(row.label, row.features)) + if isinstance(selected_df.first().features, MLLibVector): + lp_rdd = selected_df.rdd.map(lambda row: LabeledPoint(row.label, row.features)) + else: + lp_rdd = selected_df.rdd.map(lambda row: LabeledPoint(row.label, MLLibVectors.fromML(row.features))) rdd = lp_to_simple_rdd(lp_rdd, categorical, nb_classes) return rdd diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index 4fbd9db..ed531f5 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -1,7 +1,7 @@ from __future__ import print_function from __future__ import absolute_import -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors import numpy as np import random @@ -17,7 +17,7 @@ from elephas.ml_model import ElephasEstimator from elephas import optimizers as elephas_optimizers -data_path = "./" +data_path = "../" # Spark contexts conf = SparkConf().setAppName('Otto_Spark_ML_Pipeline').setMaster('local[8]') @@ -34,7 +34,7 @@ def shuffle_csv(csv_file): def load_data_rdd(csv_file, shuffle=True, train=True): if shuffle: - shuffle_csv(csv_file) + shuffle_csv(data_path + csv_file) data = sc.textFile(data_path + csv_file) data = data.filter(lambda x: x.split(',')[0] != 'id').map(lambda line: line.split(',')) if train: From b3da15f29e325bab0937cedd1605fe22ecd7965b Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 13:54:52 +0200 Subject: [PATCH 53/57] remove legacy nb_epochs --- elephas/ml/params.py | 12 ++++++------ elephas/ml_model.py | 2 +- elephas/spark_model.py | 4 ++-- elephas/worker.py | 10 ++++------ examples/ml_mlp.py | 4 ++-- examples/ml_pipeline_otto.py | 2 +- tests/ml/test_params.py | 6 +++--- tests/test_ml_model.py | 5 ++--- 8 files changed, 21 insertions(+), 24 deletions(-) diff --git a/elephas/ml/params.py b/elephas/ml/params.py index 8e210aa..b952798 100644 --- a/elephas/ml/params.py +++ b/elephas/ml/params.py @@ -110,15 +110,15 @@ class HasEpochs(Params): """ def __init__(self): super(HasEpochs, self).__init__() - self.nb_epoch = Param(self, "nb_epoch", "Number of epochs to train") - self._setDefault(nb_epoch=10) + self.epochs = Param(self, "epochs", "Number of epochs to train") + self._setDefault(epochs=10) - def set_nb_epoch(self, nb_epoch): - self._paramMap[self.nb_epoch] = nb_epoch + def set_epochs(self, epochs): + self._paramMap[self.epochs] = epochs return self - def get_nb_epoch(self): - return self.getOrDefault(self.nb_epoch) + def get_epochs(self): + return self.getOrDefault(self.epochs) class HasBatchSize(Params): diff --git a/elephas/ml_model.py b/elephas/ml_model.py index caeca00..1a572f6 100644 --- a/elephas/ml_model.py +++ b/elephas/ml_model.py @@ -59,7 +59,7 @@ def _fit(self, df): spark_model = SparkModel(model=keras_model, elephas_optimizer=elephas_optimizer, mode=self.get_mode(), frequency=self.get_frequency(), num_workers=self.get_num_workers()) - spark_model.fit(simple_rdd, epochs=self.get_nb_epoch(), batch_size=self.get_batch_size(), + spark_model.fit(simple_rdd, epochs=self.get_epochs(), batch_size=self.get_batch_size(), verbose=self.get_verbosity(), validation_split=self.get_validation_split()) model_weights = spark_model.master_network.get_weights() diff --git a/elephas/spark_model.py b/elephas/spark_model.py index 38fb885..9db0005 100644 --- a/elephas/spark_model.py +++ b/elephas/spark_model.py @@ -70,8 +70,8 @@ def __init__(self, model, mode='asynchronous', frequency='epoch', parameter_ser "got {}".format(self.parameter_server_mode)) @staticmethod - def get_train_config(nb_epoch, batch_size, verbose, validation_split): - return {'nb_epoch': nb_epoch, + def get_train_config(epochs, batch_size, verbose, validation_split): + return {'epochs': epochs, 'batch_size': batch_size, 'verbose': verbose, 'validation_split': validation_split} diff --git a/elephas/worker.py b/elephas/worker.py index a04758a..437d4b7 100644 --- a/elephas/worker.py +++ b/elephas/worker.py @@ -83,7 +83,7 @@ def train(self, data_iterator): self.model.compile(optimizer=optimizer, loss=self.master_loss, metrics=self.master_metrics) self.model.set_weights(self.parameters.value) - nb_epoch = self.train_config['nb_epoch'] + epochs = self.train_config['epochs'] batch_size = self.train_config.get('batch_size') nb_train_sample = x_train.shape[0] nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) @@ -94,20 +94,18 @@ def train(self, data_iterator): ] if self.frequency == 'epoch': - for epoch in range(nb_epoch): + for epoch in range(epochs): weights_before_training = self.client.get_parameters() self.model.set_weights(weights_before_training) self.train_config['epochs'] = 1 - self.train_config['nb_epoch'] = 1 # legacy support if x_train.shape[0] > batch_size: self.model.fit(x_train, y_train, **self.train_config) - self.train_config['epochs'] = nb_epoch - self.train_config['nb_epoch'] = nb_epoch + self.train_config['epochs'] = epochs weights_after_training = self.model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) self.client.update_parameters(deltas) elif self.frequency == 'batch': - for epoch in range(nb_epoch): + for epoch in range(epochs): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: weights_before_training = self.client.get_parameters() diff --git a/examples/ml_mlp.py b/examples/ml_mlp.py index 52a9422..6c7b029 100644 --- a/examples/ml_mlp.py +++ b/examples/ml_mlp.py @@ -18,7 +18,7 @@ # Define basic parameters batch_size = 64 nb_classes = 10 -nb_epoch = 1 +epochs = 1 # Load data (x_train, y_train), (x_test, y_test) = mnist.load_data() @@ -64,7 +64,7 @@ estimator.set_mode("synchronous") estimator.set_loss("categorical_crossentropy") estimator.set_metrics(['acc']) -estimator.set_nb_epoch(nb_epoch) +estimator.set_epochs(epochs) estimator.set_batch_size(batch_size) estimator.set_validation_split(0.1) estimator.set_categorical_labels(True) diff --git a/examples/ml_pipeline_otto.py b/examples/ml_pipeline_otto.py index ed531f5..c515c6b 100644 --- a/examples/ml_pipeline_otto.py +++ b/examples/ml_pipeline_otto.py @@ -88,7 +88,7 @@ def load_data_rdd(csv_file, shuffle=True, train=True): estimator.setFeaturesCol("scaled_features") estimator.setLabelCol("index_category") estimator.set_elephas_optimizer_config(adadelta.get_config()) -estimator.set_nb_epoch(10) +estimator.set_epochs(10) estimator.set_batch_size(128) estimator.set_num_workers(1) estimator.set_verbosity(0) diff --git a/tests/ml/test_params.py b/tests/ml/test_params.py index 0b13284..5a51ee1 100644 --- a/tests/ml/test_params.py +++ b/tests/ml/test_params.py @@ -56,10 +56,10 @@ def test_has_categorical_labels(): def test_has_epochs(): param = HasEpochs() - assert param.get_nb_epoch() == 10 + assert param.get_epochs() == 10 epochs = 42 - param.set_nb_epoch(epochs) - assert param.get_nb_epoch() == epochs + param.set_epochs(epochs) + assert param.get_epochs() == epochs def test_has_batch_size(): diff --git a/tests/test_ml_model.py b/tests/test_ml_model.py index 9cd4b5a..9656180 100644 --- a/tests/test_ml_model.py +++ b/tests/test_ml_model.py @@ -10,7 +10,6 @@ from elephas.ml_model import ElephasEstimator from elephas.ml.adapter import to_data_frame -from pyspark import SparkContext, SparkConf from pyspark.mllib.evaluation import MulticlassMetrics from pyspark.ml import Pipeline @@ -20,7 +19,7 @@ # Define basic parameters batch_size = 64 nb_classes = 10 -nb_epoch = 1 +epochs = 1 # Load data (x_train, y_train), (x_test, y_test) = mnist.load_data() @@ -64,7 +63,7 @@ def test_spark_ml_model(spark_context): estimator.set_mode("synchronous") estimator.set_loss("categorical_crossentropy") estimator.set_metrics(['acc']) - estimator.set_nb_epoch(nb_epoch) + estimator.set_epochs(epochs) estimator.set_batch_size(batch_size) estimator.set_validation_split(0.1) estimator.set_categorical_labels(True) From 93b85074e27fc5e757260579e24dc9486dc87bb8 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 14:30:11 +0200 Subject: [PATCH 54/57] update readme --- README.md | 250 ++++++++++++++---------------------------------------- 1 file changed, 66 insertions(+), 184 deletions(-) diff --git a/README.md b/README.md index e9cd3df..3723b0b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # Elephas: Distributed Deep Learning with Keras & Spark [![Build Status](https://travis-ci.org/maxpumperla/elephas.svg?branch=master)](https://travis-ci.org/maxpumperla/elephas) -Elephas is an extension of [Keras](http://keras.io), which allows you to run distributed deep learning models at scale with [Spark](http://spark.apache.org). Elephas currently supports a number of applications, including: +Elephas is an extension of [Keras](http://keras.io), which allows you to run distributed deep learning models at +scale with [Spark](http://spark.apache.org). Elephas currently supports a number of +applications, including: -- [Data-parallel training of deep learning models](#usage-of-data-parallel-models) +- [Data-parallel training of deep learning models](#basic-spark-integration) - [Distributed hyper-parameter optimization](#distributed-hyper-parameter-optimization) - [Distributed training of ensemble models](#distributed-training-of-ensemble-models) @@ -12,25 +14,18 @@ Schematically, elephas works as follows. ![Elephas](elephas.gif) Table of content: -- [Elephas: Distributed Deep Learning with Keras & Spark](#elephas-distributed-deep-learning-with-keras-&-spark-) - - [Introduction](#introduction) - - [Getting started](#getting-started) - - [Installation](#installation) - - [Basic example](#basic-example) - - [Spark ML example](#spark-ml-example) - - [Usage of data-parallel models](#usage-of-data-parallel-models) - - [Model updates (optimizers)](#model-updates-optimizers) - - [Update frequency](#update-frequency) - - [Update mode](#update-mode) - - [Asynchronous updates with read and write locks (`mode='asynchronous'`)](#asynchronous-updates-with-read-and-write-locks-modeasynchronous) - - [Asynchronous updates without locks (`mode='hogwild'`)](#asynchronous-updates-without-locks-modehogwild) - - [Synchronous updates (`mode='synchronous'`)](#synchronous-updates-modesynchronous) - - [Degree of parallelization (number of workers)](#degree-of-parallelization-number-of-workers) - - [Distributed hyper-parameter optimization](#distributed-hyper-parameter-optimization) - - [Distributed training of ensemble models](#distributed-training-of-ensemble-models) - - [Discussion](#discussion) - - [Future work & contributions](#future-work-&-contributions) - - [Literature](#literature) +* [Elephas: Distributed Deep Learning with Keras & Spark](#elephas-distributed-deep-learning-with-keras-&-spark-) + * [Introduction](#introduction) + * [Getting started](#getting-started) + * [Basic Spark integration](#basic-spark-integration) + * [Spark MLlib integration](#spark-mllib-integration) + * [Spark ML integration](#spark-ml-integration) + * [Distributed hyper-parameter optimization](#distributed-hyper-parameter-optimization) + * [Distributed training of ensemble models](#distributed-training-of-ensemble-models) + * [Discussion](#discussion) + * [Literature](#literature) + + ## Introduction Elephas brings deep learning with [Keras](http://keras.io) to [Spark](http://spark.apache.org). Elephas intends to @@ -51,58 +46,24 @@ asynchronously. ## Getting started -### Installation Just install elephas from PyPI with, Spark will be installed through `pyspark` for you. ``` pip install elephas ``` -### Using Docker - -Install and get Docker running by following the [instructions here](https://www.docker.com/). - -#### Building - -The build takes quite a while to run the first time since many packages need to be downloaded and installed. In the -same directory as the ```Dockerfile``` run the following commands - -``` -docker build . -t pyspark/elephas -``` - -#### Running - -The following command starts a container with the Notebook server listening for HTTP connections on port -8899 (since local Jupyter notebooks use 8888) without authentication configured. - -``` -docker run -d -p 8899:8888 pyspark/elephas -``` +That's it, you should now be able to run Elephas examples. -#### Settings +## Basic Spark integration -- Memory -In the ```Dockerfile``` the following lines can be adjusted to configure memory settings. - -``` -ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info -``` - -- Other -Other settings / configurations can be examined here https://github.com/kmader/docker-stacks/tree/master/keras-spark-notebook - -### Basic example -After installing both Elephas and Spark, training a model is done schematically as follows: - -- Create a local pyspark context +After installing both Elephas, you can train a model as follows. First, create a local pyspark context ```python from pyspark import SparkContext, SparkConf conf = SparkConf().setAppName('Elephas_App').setMaster('local[8]') sc = SparkContext(conf=conf) ``` -- Define and compile a Keras model +Next, you define and compile a Keras model ```python from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation @@ -119,60 +80,61 @@ model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=SGD()) ``` -- Create an RDD from numpy arrays +and create an RDD from numpy arrays (or however you want to create an RDD) ```python from elephas.utils.rdd_utils import to_simple_rdd -rdd = to_simple_rdd(sc, X_train, Y_train) +rdd = to_simple_rdd(sc, x_train, y_train) ``` -- A SparkModel is defined by passing Spark context and Keras model. Additionally, one has choose an optimizer used for -updating the elephas model, an update frequency, a parallelization mode and the degree of parallelism, i.e. the number -of workers. +The basic model in Elephas is the `SparkModel`. You initialize a `SparkModel` by passing in a compiled Keras model, +an update frequency and a parallelization mode. After that you can simply `fit` the model on your RDD. Elephas `fit` +has the same options as a Keras model, so you can pass `epochs`, `batch_size` etc. as you're used to from Keras. + ```python from elephas.spark_model import SparkModel -from elephas import optimizers as elephas_optimizers -adagrad = elephas_optimizers.Adagrad() -spark_model = SparkModel(model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2) -spark_model.train(rdd, epochs=20, batch_size=32, verbose=0, validation_split=0.1) +spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') +spark_model.fit(rdd, epochs=20, batch_size=32, verbose=0, validation_split=0.1) ``` -- Run your script using spark-submit -``` +Your script can now be run using spark-submit +```bash spark-submit --driver-memory 1G ./your_script.py ``` + Increasing the driver memory even further may be necessary, as the set of parameters in a network may be very large and collecting them on the driver eats up a lot of resources. See the examples folder for a few working examples. -### Spark MLlib example -Following up on the last example, to create an RDD of LabeledPoints for supervised training from pairs of -numpy arrays, use + +## Spark MLlib integration + +Following up on the last example, to use Spark's MLlib library with Elephas, you create an RDD of LabeledPoints for +supervised training as follows ```python from elephas.utils.rdd_utils import to_labeled_point -lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True) +lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) ``` Training a given LabeledPoint-RDD is very similar to what we've seen already ```python from elephas.spark_model import SparkMLlibModel -adadelta = elephas_optimizers.Adadelta() -spark_model = SparkMLlibModel(sc,model, optimizer=adadelta, frequency='batch', mode='hogwild', num_workers=2) -spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) +spark_model = SparkMLlibModel(model, frequency='batch', mode='hogwild') +spark_model.train(lp_rdd, epochs=20, batch_size=32, verbose=0, validation_split=0.1, + categorical=True, nb_classes=nb_classes) ``` -### Spark ML example + +## Spark ML integration + To train a model with a SparkML estimator on a data frame, use the following syntax. ```python -df = to_data_frame(sc, X_train, Y_train, categorical=True) -test_df = to_data_frame(sc, X_test, Y_test, categorical=True) - -adadelta = elephas_optimizers.Adadelta() -estimator = ElephasEstimator(sc,model, - nb_epoch=nb_epoch, batch_size=batch_size, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2, - verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) +df = to_data_frame(sc, x_train, y_train, categorical=True) +test_df = to_data_frame(sc, x_test, y_test, categorical=True) +estimator = ElephasEstimator(model, epochs=epochs, batch_size=batch_size, frequency='batch', mode='asynchronous', + categorical=True, nb_classes=nb_classes) fitted_model = estimator.fit(df) ``` @@ -184,74 +146,12 @@ prediction = fitted_model.transform(test_df) pnl = prediction.select("label", "prediction") pnl.show(100) -prediction_and_label= pnl.map(lambda row: (row.label, row.prediction)) +prediction_and_label= pnl.rdd.map(lambda row: (row.label, row.prediction)) metrics = MulticlassMetrics(prediction_and_label) print(metrics.precision()) print(metrics.recall()) ``` -## Usage of data-parallel models - -In the first example above we have seen that an elephas model is instantiated like this - -```python -spark_model = SparkModel(sc,model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=2) -``` -So, apart from the canonical Spark context and Keras model, Elephas models have four parameters to tune and -we will describe each of them next. - -### Model updates (optimizers) - -`optimizer`: The optimizers module in elephas is an adaption of the same module in keras, i.e. it provides the -user with the following list of optimizers: - -- `SGD` -- `RMSprop` -- `Adagrad` -- `Adadelta` -- `Adam` - -Once constructed, each of these can be passed to the *optimizer* parameter of the model. Updates in keras are -computed with the help of theano, so most of the data structures in keras optimizers stem from theano. In -elephas, gradients have already been computed by the respective workers, so it makes sense to entirely work -with numpy arrays internally. - -Note that in order to set up an elephas model, you have to specify two optimizers, one for elephas and one for the -underlying keras model. Individual workers produce updates according to keras optimizers and the "master" model on the -driver uses elephas optimizers to aggregate them. For starters, we recommend keras models with SGD and elephas models -with Adagrad or Adadelta. - -### Update frequency - -`frequency`: The user can decide how often updates are passed to the master model by controlling the *frequency* -parameter. To update every batch, choose 'batch' and to update only after every epoch, choose 'epoch'. - -### Update mode - -`mode`: Currently, there's three different modes available in elephas, each corresponding to a different heuristic or -parallelization scheme adopted, which is controlled by the *mode* parameter. The default property is 'asynchronous'. - -#### Asynchronous updates with read and write locks (`mode='asynchronous'`) - -This mode implements the algorithm described as *downpour* in [1], i.e. each worker can send updates whenever they -are ready. The master model makes sure that no update gets lost, i.e. multiple updates get applied at the "same" time, -by locking the master parameters while reading and writing parameters. This idea has been used in Google's DistBelief -framework. - -#### Asynchronous updates without locks (`mode='hogwild'`) -Essentially the same procedure as above, but without requiring the locks. This heuristic assumes that we still fare -well enough, even if we loose an update here or there. Updating parameters lock-free in a non-distributed setting -for SGD goes by the name 'Hogwild!' [2], it's distributed extension is called 'Dogwild!' [3]. - -#### Synchronous updates (`mode='synchronous'`) - -In this mode each worker sends a new batch of parameter updates at the same time, which are then processed on the -master. Accordingly, this algorithm is sometimes called *batch synchronous parallel* or just BSP. - -### Degree of parallelization (number of workers) - -`num_workers`: Lastly, the degree to which we parallelize our training data is controlled by the -parameter *num_workers*. ## Distributed hyper-parameter optimization @@ -267,43 +167,26 @@ this works. ```python from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe +from hyperopt import STATUS_OK from hyperas.distributions import choice, uniform def data(): - ''' - Data providing function: - - Make sure to have every relevant import statement included here and return data as - used in model function below. This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - ''' from keras.datasets import mnist from keras.utils import np_utils - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 + (x_train, y_train), (x_test, y_test) = mnist.load_data() + x_train = x_train.reshape(60000, 784) + x_test = x_test.reshape(10000, 784) + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - ''' - Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - ''' + y_train = np_utils.to_categorical(y_train, nb_classes) + y_test = np_utils.to_categorical(y_test, nb_classes) + return x_train, y_train, x_test, y_test + + +def model(x_train, y_train, x_test, y_test): from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.optimizers import RMSprop @@ -321,21 +204,20 @@ def model(X_train, Y_train, X_test, Y_test): rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) - model.fit(X_train, Y_train, + model.fit(x_train, y_train, batch_size={{choice([64, 128])}}, nb_epoch=1, show_accuracy=True, verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) + validation_data=(x_test, y_test)) + score, acc = model.evaluate(x_test, y_test, show_accuracy=True, verbose=0) print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml(), 'weights': pickle.dumps(model.get_weights())} + return {'loss': -acc, 'status': STATUS_OK, 'model': model.to_yaml()} ``` Once the basic setup is defined, running the minimization is done in just a few lines of code: ```python -from hyperas import optim from elephas.hyperparam import HyperParamModel from pyspark import SparkContext, SparkConf From 44388b57e70701aa8237063233deb8fe1ca41ca6 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 14:43:15 +0200 Subject: [PATCH 55/57] travis --- .travis.yml | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0a7237b..a91b9c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ dist: trusty language: python python: - "2.7" - # - "3.4" # Note that hyperopt currently seems to have issues with 3.4 + - "3.4" # Note that hyperopt currently seems to have issues with 3.4 install: # code below is taken from http://conda.pydata.org/docs/travis.html # We do this conditionally because it saves us some downloading if the @@ -21,20 +21,11 @@ install: - conda info -a - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py flask - source activate test-environment - - pip install pytest-cov python-coveralls - - pip install git+git://github.com/Theano/Theano.git - - pip install keras - - python setup.py install + - pip install pytest pytest-cov python-coveralls + - pip install -r requirements.txt - # Install Spark - - wget http://apache.mirrors.tds.net/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.6.tgz -P $HOME - - tar zxvf $HOME/spark-* -C $HOME - - export SPARK_HOME=$HOME/spark-1.5.2-bin-hadoop2.6 - - export PATH=$PATH:$SPARK_HOME/bin - -# Just run an example for now script: - python -c "import keras.backend" - - spark-submit --driver-memory 2G $PWD/examples/mnist_mlp_spark.py + - py.test tests/ after_success: - coveralls From 8d917c8a8b332a3e1037ed16142d5bba9442d3a7 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 14:50:46 +0200 Subject: [PATCH 56/57] travis --- .travis.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index a91b9c6..74586cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ dist: trusty language: python python: - "2.7" - - "3.4" # Note that hyperopt currently seems to have issues with 3.4 + #- "3.4" # Note that hyperopt currently seems to have issues with 3.4 install: # code below is taken from http://conda.pydata.org/docs/travis.html # We do this conditionally because it saves us some downloading if the @@ -19,10 +19,11 @@ install: - conda config --set always_yes yes --set changeps1 no - conda update -q conda - conda info -a - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py flask + - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy pytest h5py flask - source activate test-environment - - pip install pytest pytest-cov python-coveralls + - pip install pytest-cov python-coveralls - pip install -r requirements.txt + - python setup.py install script: - python -c "import keras.backend" From 24b544a8457fa966302d490331c870d1f5d4586b Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Wed, 15 Aug 2018 15:00:59 +0200 Subject: [PATCH 57/57] travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 74586cb..638bb9b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,8 +22,8 @@ install: - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy pytest h5py flask - source activate test-environment - pip install pytest-cov python-coveralls - - pip install -r requirements.txt - python setup.py install + - pip install -r requirements.txt script: - python -c "import keras.backend"