From 8f962f3185111c871d1eaaa0bd7ff3b56ec4b82c Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Thu, 6 Oct 2016 11:40:46 +0300 Subject: [PATCH] added de.javakaffe into geomesa subproject; required UnmodifiableCollection kryo serializer --- geomesa/build.sbt | 2 ++ .../GeometryToGeoMesaSimpleFeature.scala | 2 +- .../io/geomesa/GeoMesaFeatureReader.scala | 2 +- .../io/geomesa/GeoMesaFeatureWriter.scala | 3 +- .../io/geomesa/kryo/KryoRegistrator.scala | 14 ++++++++++ .../geotrellis/GeoMesaTestEnvironment.scala | 28 +++++++++++++++++++ .../geotools/GeoMesaSimpleFeatureType.scala | 2 +- .../io/geomesa/GeoMesaPersistenceSpec.scala | 5 ++-- 8 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 geomesa/src/main/scala/geotrellis/spark/io/geomesa/kryo/KryoRegistrator.scala create mode 100644 geomesa/src/test/scala/geotrellis/GeoMesaTestEnvironment.scala diff --git a/geomesa/build.sbt b/geomesa/build.sbt index 225d4948c9..788dda634e 100644 --- a/geomesa/build.sbt +++ b/geomesa/build.sbt @@ -5,6 +5,8 @@ libraryDependencies ++= Seq( "org.locationtech.geomesa" % "geomesa-jobs" % Version.geomesa, "org.locationtech.geomesa" % "geomesa-accumulo-datastore" % Version.geomesa, "org.locationtech.geomesa" % "geomesa-utils" % Version.geomesa, + "de.javakaffee" % "kryo-serializers" % "0.38" exclude("com.esotericsoftware", "kryo"), + "com.esotericsoftware" % "kryo-shaded" % "3.0.3", "org.apache.spark" %% "spark-core" % Version.spark % "provided", spire, scalatest % "test") diff --git a/geomesa/src/main/scala/geotrellis/geomesa/geotools/GeometryToGeoMesaSimpleFeature.scala b/geomesa/src/main/scala/geotrellis/geomesa/geotools/GeometryToGeoMesaSimpleFeature.scala index a10a65b6ba..be6653cb05 100644 --- a/geomesa/src/main/scala/geotrellis/geomesa/geotools/GeometryToGeoMesaSimpleFeature.scala +++ b/geomesa/src/main/scala/geotrellis/geomesa/geotools/GeometryToGeoMesaSimpleFeature.scala @@ -15,7 +15,7 @@ object GeometryToGeoMesaSimpleFeature { val whenField = "when" val whereField = "where" - @transient lazy val featureTypeCache = + lazy val featureTypeCache = new LRUCache[String, SimpleFeatureType]( maxSize = ConfigFactory.load().getInt("geotrellis.geomesa.featureTypeCacheSize"), sizeOf = {x => 1l} diff --git a/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureReader.scala b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureReader.scala index 8bcb243fd7..84a4259d71 100644 --- a/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureReader.scala +++ b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureReader.scala @@ -24,7 +24,7 @@ class GeoMesaFeatureReader(val instance: GeoMesaInstance)(implicit sc: SparkCont numPartitions: Option[Int] = None ): RDD[SimpleFeature] = { val dataStore = instance.accumuloDataStore - dataStore.createSchema(simpleFeatureType) + if(!dataStore.getTypeNames().contains(simpleFeatureType.getTypeName)) dataStore.createSchema(simpleFeatureType) dataStore.dispose() val job = Job.getInstance(sc.hadoopConfiguration) diff --git a/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureWriter.scala b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureWriter.scala index d8ac4f4e11..bb4c2b5ac1 100644 --- a/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureWriter.scala +++ b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/GeoMesaFeatureWriter.scala @@ -9,13 +9,12 @@ import org.apache.spark.rdd.RDD import org.geotools.data.Transaction import org.opengis.feature.simple.{SimpleFeature, SimpleFeatureType} -import scala.collection.JavaConversions._ - class GeoMesaFeatureWriter(val instance: GeoMesaInstance)(implicit sc: SparkContext) extends Serializable { def write[G <: Geometry, D: ? => Seq[(String, Any)]] (layerId: LayerId, rdd: RDD[Feature[G, D]]) (implicit ev: Feature[G, D] => FeatureToGeoMesaSimpleFeatureMethods[G, D]): Unit = { + // SimpleFeatureType requires valid UnmodifiableCollection kryo serializer rdd .map { f => val sf = f.toSimpleFeature(layerId.name); sf.getFeatureType -> sf }.groupByKey .foreachPartition { (partition: Iterator[(SimpleFeatureType, Iterable[SimpleFeature])]) => diff --git a/geomesa/src/main/scala/geotrellis/spark/io/geomesa/kryo/KryoRegistrator.scala b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/kryo/KryoRegistrator.scala new file mode 100644 index 0000000000..d007f19b06 --- /dev/null +++ b/geomesa/src/main/scala/geotrellis/spark/io/geomesa/kryo/KryoRegistrator.scala @@ -0,0 +1,14 @@ +package geotrellis.spark.io.geomesa.kryo + +import com.esotericsoftware.kryo.Kryo +import org.apache.spark.serializer.{KryoRegistrator => SparkKryoRegistrator} +import de.javakaffee.kryoserializers._ + +class KryoRegistrator extends SparkKryoRegistrator { + override def registerClasses(kryo: Kryo): Unit = { + new geotrellis.spark.io.kryo.KryoRegistrator().registerClasses(kryo) + + // SimpleFeatureType requires proper UnmodifiableCollection serializer + UnmodifiableCollectionsSerializer.registerSerializers(kryo) + } +} diff --git a/geomesa/src/test/scala/geotrellis/GeoMesaTestEnvironment.scala b/geomesa/src/test/scala/geotrellis/GeoMesaTestEnvironment.scala new file mode 100644 index 0000000000..2fe1ff9bbf --- /dev/null +++ b/geomesa/src/test/scala/geotrellis/GeoMesaTestEnvironment.scala @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2014 DigitalGlobe. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package geotrellis + +import geotrellis.spark.TestEnvironment + +import org.apache.spark.SparkConf +import org.scalatest._ + +trait GeoMesaTestEnvironment extends TestEnvironment { self: Suite => + override def setKryoRegistrator(conf: SparkConf) = + conf.set("spark.kryo.registrator", classOf[geotrellis.spark.io.geomesa.kryo.KryoRegistrator].getName) + .set("spark.kryo.registrationRequired", "false") +} diff --git a/geomesa/src/test/scala/geotrellis/geomesa/geotools/GeoMesaSimpleFeatureType.scala b/geomesa/src/test/scala/geotrellis/geomesa/geotools/GeoMesaSimpleFeatureType.scala index 72abb5f350..446f329b24 100644 --- a/geomesa/src/test/scala/geotrellis/geomesa/geotools/GeoMesaSimpleFeatureType.scala +++ b/geomesa/src/test/scala/geotrellis/geomesa/geotools/GeoMesaSimpleFeatureType.scala @@ -17,7 +17,7 @@ object GeoMesaSimpleFeatureType { val whenField = GeometryToGeoMesaSimpleFeature.whenField val whereField = GeometryToGeoMesaSimpleFeature.whereField - @transient lazy val featureTypeCache = + lazy val featureTypeCache = new LRUCache[String, SimpleFeatureType]( maxSize = ConfigFactory.load().getInt("geotrellis.geomesa.featureTypeCacheSize"), sizeOf = {x => 1l} diff --git a/geomesa/src/test/scala/geotrellis/spark/io/geomesa/GeoMesaPersistenceSpec.scala b/geomesa/src/test/scala/geotrellis/spark/io/geomesa/GeoMesaPersistenceSpec.scala index 271525ec20..cfcba7c224 100644 --- a/geomesa/src/test/scala/geotrellis/spark/io/geomesa/GeoMesaPersistenceSpec.scala +++ b/geomesa/src/test/scala/geotrellis/spark/io/geomesa/GeoMesaPersistenceSpec.scala @@ -3,7 +3,6 @@ package geotrellis.spark.io.geomesa import geotrellis.geomesa.geotools.{GeoMesaSimpleFeatureType, GeometryToGeoMesaSimpleFeature} import geotrellis.spark.{LayerId, TestEnvironment} import geotrellis.vector._ - import org.opengis.filter.Filter import org.apache.spark.rdd.RDD import org.geotools.data.Query @@ -12,7 +11,9 @@ import org.scalatest.{BeforeAndAfterAll, FunSpec, Matchers, Suite} import java.text.SimpleDateFormat import java.util.TimeZone -class GeoMesaPersistenceSpec extends FunSpec with Suite with BeforeAndAfterAll with Matchers with TestEnvironment { +import geotrellis.GeoMesaTestEnvironment + +class GeoMesaPersistenceSpec extends FunSpec with Suite with BeforeAndAfterAll with Matchers with GeoMesaTestEnvironment { describe("GeoMesa Features Spec") { val featuresInstance = GeoMesaInstance(