Skip to content

Commit

Permalink
Merge pull request #200 from jbouffard/serializers
Browse files Browse the repository at this point in the history
Added Kyro Registrator to SparkConf
  • Loading branch information
Jacob Bouffard committed May 15, 2017
2 parents 2370c60 + 44d70b5 commit e3f0b5f
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ ${WHEEL}: ${DIST-ASSEMBLY} $(call rwildcard, geopyspark, *.py) setup.py
wheel: ${WHEEL}

pyspark: ${DIST-ASSEMBLY}
pyspark --jars ${DIST-ASSEMBLY}
pyspark --jars ${DIST-ASSEMBLY} \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kyro.registrator=geotrellis.spark.io.kyro.KryoRegistrator

docker/archives/${ASSEMBLYNAME}: ${DIST-ASSEMBLY}
cp -f ${DIST-ASSEMBLY} docker/archives/${ASSEMBLYNAME}
Expand Down
2 changes: 1 addition & 1 deletion docker/kernels/local/kernel.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"PYSPARK_PYTHON": "/usr/bin/python3.4",
"SPARK_HOME": "/usr/local/spark-2.1.0-bin-hadoop2.7",
"PYTHONPATH": "/usr/local/spark-2.1.0-bin-hadoop2.7/python/lib/pyspark.zip:/usr/local/spark-2.1.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip",
"PYSPARK_SUBMIT_ARGS": "--master local --archives /blobs/gdal-and-friends.tar.gz,/blobs/geopyspark-and-friends.tar.gz --jars /blobs/geotrellis-backend-assembly-0.1.0.jar --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.executorEnv.LD_LIBRARY_PATH=gdal-and-friends.tar.gz/lib/ --conf spark.executorEnv.PYTHONPATH=geopyspark-and-friends.tar.gz/ pyspark-shell"
"PYSPARK_SUBMIT_ARGS": "--master local --archives /blobs/gdal-and-friends.tar.gz,/blobs/geopyspark-and-friends.tar.gz --jars /blobs/geotrellis-backend-assembly-0.1.0.jar --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.kyro.registrator=geotrellis.spark.io.kyro.KryoRegistrator --conf spark.executorEnv.LD_LIBRARY_PATH=gdal-and-friends.tar.gz/lib/ --conf spark.executorEnv.PYTHONPATH=geopyspark-and-friends.tar.gz/ pyspark-shell"
},
"language": "python",
"display_name": "PySpark (local)"
Expand Down
2 changes: 2 additions & 0 deletions geopyspark/geopyspark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ def setup_environment():
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars {} \
--conf spark.ui.enabled=false \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kyro.registrator=geotrellis.spark.io.kyro.KryoRegistrator \
--driver-memory 2G \
--executor-memory 2G \
pyspark-shell".format(jar_string)
else:
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars {} \
--conf spark.ui.enabled=false \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kyro.registrator=geotrellis.spark.io.kyro.KryoRegistrator \
--driver-memory 8G \
--executor-memory 8G \
pyspark-shell".format(jar_string)

0 comments on commit e3f0b5f

Please sign in to comment.