Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Process Process-1: OSError: [Errno 98] Address already in use #65

Closed
rayjang opened this issue Oct 23, 2017 · 1 comment
Closed

Process Process-1: OSError: [Errno 98] Address already in use #65

rayjang opened this issue Oct 23, 2017 · 1 comment

Comments

@rayjang
Copy link

rayjang commented Oct 23, 2017

I try to use elephas but I got this error.
I using Keras 2.x, python3.5 and spark2.0
adagrad = elephas_optimizers.Adagrad() spark_model = SparkModel(sc,model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=4) spark_model.train(rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1)

I got this error.

Process Process-1:
OSError: [Errno 98] Address already in use
Traceback (most recent call last):
File "/HOME/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/HOME/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/HOME/anaconda3/lib/python3.5/site-packages/elephas/spark_model.py", line 149, in start_service
threaded=True, use_reloader=False)
File "/HOME/anaconda3/lib/python3.5/site-packages/flask/app.py", line 843, in run
run_simple(host, port, self, **options)
File "/HOME/anaconda3/lib/python3.5/site-packages/werkzeug/serving.py", line 694, in run_simple
inner()
File "/HOME/anaconda3/lib/python3.5/site-packages/werkzeug/serving.py", line 656, in inner
fd=fd)
File "/HOME/anaconda3/lib/python3.5/site-packages/werkzeug/serving.py", line 544, in make_server
passthrough_errors, ssl_context, fd=fd)
File "/HOME/anaconda3/lib/python3.5/site-packages/werkzeug/serving.py", line 464, in init
HTTPServer.init(self, (host, int(port)), handler)
File "/HOME/anaconda3/lib/python3.5/socketserver.py", line 440, in init
self.server_bind()
File "/HOME/anaconda3/lib/python3.5/http/server.py", line 138, in server_bind
socketserver.TCPServer.server_bind(self)
File "/HOME/anaconda3/lib/python3.5/socketserver.py", line 454, in server_bind
self.socket.bind(self.server_address)

Py4JJavaError Traceback (most recent call last)
in ()
4 adagrad = elephas_optimizers.Adagrad()
5 spark_model = SparkModel(sc,model, optimizer=adagrad, frequency='epoch', mode='asynchronous', num_workers=4)
----> 6 spark_model.train(rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1)

/HOME/anaconda3/lib/python3.5/site-packages/elephas/spark_model.py in train(self, rdd, nb_epoch, batch_size, verbose, validation_split)
170
171 if self.mode in ['asynchronous', 'synchronous', 'hogwild']:
--> 172 self._train(rdd, nb_epoch, batch_size, verbose, validation_split, master_url)
173 else:
174 print("""Choose from one of the modes: asynchronous, synchronous or hogwild""")

/HOME/anaconda3/lib/python3.5/site-packages/elephas/spark_model.py in _train(self, rdd, nb_epoch, batch_size, verbose, validation_split, master_url)
186 if self.mode in ['asynchronous', 'hogwild']:
187 worker = AsynchronousSparkWorker(yaml, train_config, self.frequency, master_url)
--> 188 rdd.mapPartitions(worker.train).collect()
189 new_parameters = get_server_weights(master_url)
190 elif self.mode == 'synchronous':

/HOME/NAME/spark-2.0.0-bin-hadoop2.7/python/pyspark/rdd.py in collect(self)
774 """
775 with SCCallSiteSync(self.context) as css:
--> 776 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
777 return list(_load_from_socket(port, self._jrdd_deserializer))
778

/HOME/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py in call(self, *args)
1158 answer = self.gateway_client.send_command(command)
1159 return_value = get_return_value(
-> 1160 answer, self.gateway_client, self.target_id, self.name)
1161
1162 for temp_arg in temp_args:

/HOME/anaconda3/lib/python3.5/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
318 raise Py4JJavaError(
319 "An error occurred while calling {0}{1}{2}.\n".
--> 320 format(target_id, ".", name), value)
321 else:
322 raise Py4JError(

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 1.0 failed 1 times, most recent failure: Lost task 3.0 in stage 1.0 (TID 11, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 172, in main
process()
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 167, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/HOME/anaconda3/lib/python3.5/site-packages/elephas/spark_model.py", line 252, in train
model = model_from_yaml(self.yaml)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/models.py", line 329, in model_from_yaml
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/init.py", line 54, in deserialize
printable_module_name='layer')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 139, in deserialize_keras_object
list(custom_objects.items())))
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 2487, in from_config
process_layer(layer_data)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 2473, in process_layer
custom_objects=custom_objects)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/init.py", line 54, in deserialize
printable_module_name='layer')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 141, in deserialize_keras_object
return cls.from_config(config['config'])
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 1252, in from_config
return cls(**config)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/core.py", line 283, in init
self.activation = activations.get(activation)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/activations.py", line 95, in get
return deserialize(identifier)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/activations.py", line 87, in deserialize
printable_module_name='activation function')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 159, in deserialize_keras_object
':' + function_name)
ValueError: Unknown activation function:relu6

at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:193)
at org.apache.spark.api.python.PythonRunner$$anon$1.(PythonRDD.scala:234)
at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:152)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1450)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1438)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1437)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1437)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1659)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1618)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1607)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1871)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1884)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1897)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1911)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:893)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.collect(RDD.scala:892)
at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:453)
at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:211)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 172, in main
process()
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/worker.py", line 167, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/HOME/rayjang/spark-2.0.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/HOME/anaconda3/lib/python3.5/site-packages/elephas/spark_model.py", line 252, in train
model = model_from_yaml(self.yaml)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/models.py", line 329, in model_from_yaml
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/init.py", line 54, in deserialize
printable_module_name='layer')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 139, in deserialize_keras_object
list(custom_objects.items())))
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 2487, in from_config
process_layer(layer_data)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 2473, in process_layer
custom_objects=custom_objects)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/init.py", line 54, in deserialize
printable_module_name='layer')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 141, in deserialize_keras_object
return cls.from_config(config['config'])
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/engine/topology.py", line 1252, in from_config
return cls(**config)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/layers/core.py", line 283, in init
self.activation = activations.get(activation)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/activations.py", line 95, in get
return deserialize(identifier)
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/activations.py", line 87, in deserialize
printable_module_name='activation function')
File "/HOME/anaconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 159, in deserialize_keras_object
':' + function_name)
ValueError: Unknown activation function:relu6

at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:193)
at org.apache.spark.api.python.PythonRunner$$anon$1.(PythonRDD.scala:234)
at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:152)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:85)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more

@rayjang rayjang closed this as completed Oct 24, 2017
@shan1322
Copy link

shan1322 commented Feb 3, 2020

any solution to this problem?i killed a spark process in execution when i got this error

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants