Skip to content

Commit

Permalink
Replace hadoop minicluster tests with testcontainers (#3082)
Browse files Browse the repository at this point in the history
* Fix CI continue on test failure
* Increase timeout in postgis test to reduce CI failures
  • Loading branch information
elahrvivaz committed Mar 25, 2024
1 parent 46efa9c commit 4ae4192
Show file tree
Hide file tree
Showing 14 changed files with 142 additions and 104 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/build-and-test-2.12.yml
Expand Up @@ -28,23 +28,24 @@ jobs:
run: ./build/mvn clean install $MAVEN_CLI_OPTS -DskipTests -T4
- name: Unit tests
id: test
continue-on-error: true
run: |
set -o pipefail
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS | tee -a test.log
continue-on-error: true
- name: Unit tests (retry)
id: test-retry
if: steps.test.outcome=='failure'
continue-on-error: true
run: |
set -o pipefail
RESUME_FROM="$(tail -n2 test.log | grep 'rf' | sed 's/.*-rf/-rf/')"
RESUME_FROM="$(grep --text 'mvn <args> -rf ' test.log | tail -n1 | sed 's/.*-rf/-rf/')"
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS $RESUME_FROM | tee -a test.log
- name: Unit tests (retry)
id: test-retry-retry
if: steps.test-retry.outcome=='failure'
run: |
set -o pipefail
RESUME_FROM="$(tail -n2 test.log | grep 'rf' | sed 's/.*-rf/-rf/')"
RESUME_FROM="$(grep --text 'mvn <args> -rf ' test.log | tail -n1 | sed 's/.*-rf/-rf/')"
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS $RESUME_FROM | tee -a test.log
- name: Remove geomesa artifacts
if: success() || failure()
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/build-and-test-2.13.yml
Expand Up @@ -30,23 +30,24 @@ jobs:
run: ./build/mvn clean install $MAVEN_CLI_OPTS -DskipTests -T4
- name: Unit tests
id: test
continue-on-error: true
run: |
set -o pipefail
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS | tee -a test.log
continue-on-error: true
- name: Unit tests (retry)
id: test-retry
if: steps.test.outcome=='failure'
continue-on-error: true
run: |
set -o pipefail
RESUME_FROM="$(tail -n2 test.log | grep 'rf' | sed 's/.*-rf/-rf/')"
RESUME_FROM="$(grep --text 'mvn <args> -rf ' test.log | tail -n1 | sed 's/.*-rf/-rf/')"
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS $RESUME_FROM | tee -a test.log
- name: Unit tests (retry)
id: test-retry-retry
if: steps.test-retry.outcome=='failure'
run: |
set -o pipefail
RESUME_FROM="$(tail -n2 test.log | grep 'rf' | sed 's/.*-rf/-rf/')"
RESUME_FROM="$(grep --text 'mvn <args> -rf ' test.log | tail -n1 | sed 's/.*-rf/-rf/')"
mvn surefire:test $MAVEN_CLI_OPTS $MAVEN_TEST_OPTS $RESUME_FROM | tee -a test.log
- name: Remove geomesa artifacts
if: success() || failure()
Expand Down
5 changes: 2 additions & 3 deletions build/cqs.tsv
Expand Up @@ -275,8 +275,8 @@ org.apache.hadoop:hadoop-client 3.3.6 provided
org.apache.hadoop:hadoop-common 3.3.6 provided
org.apache.hadoop:hadoop-distcp 3.3.6 provided
org.apache.hadoop:hadoop-hdfs 3.3.6 provided
org.apache.hadoop:hadoop-mapreduce-client-common 3.3.6 provided
org.apache.hadoop:hadoop-mapreduce-client-core 3.3.6 provided
org.apache.hadoop:hadoop-mapreduce-client-jobclient 3.3.6 provided
org.apache.hadoop:hadoop-yarn-api 3.3.6 provided
org.apache.hadoop:hadoop-yarn-common 3.3.6 provided
org.apache.hbase:hbase-server 2.5.7-hadoop3 provided
Expand All @@ -302,7 +302,6 @@ org.apache.arrow:arrow-vector tests:15.0.2 test
org.apache.cassandra:cassandra-all 3.11.14 test
org.apache.cassandra:cassandra-thrift 3.11.14 test
org.apache.curator:curator-test 5.6.0 test
org.apache.hadoop:hadoop-minicluster 3.3.6 test
org.apache.hbase:hbase-testing-util 2.5.7-hadoop3 test
org.apache.kafka:kafka-clients test:3.7.0 test
org.apache.kafka:kafka-streams-test-utils 3.7.0 test
Expand All @@ -311,7 +310,7 @@ org.apache.logging.log4j:log4j-core 2.22.1 test
org.apache.sedona:sedona-common 1.5.0 test
org.cassandraunit:cassandra-unit 3.7.1.0 test
org.codehaus.groovy:groovy-jsr223 3.0.20 test
org.geomesa.testcontainers:testcontainers-accumulo 1.1.0 test
org.geomesa.testcontainers:testcontainers-accumulo 1.3.0 test
org.geotools:gt-epsg-hsql 30.2 test
org.jruby:jruby 9.4.5.0 test
org.mockito:mockito-core 2.28.2 test
Expand Down
14 changes: 0 additions & 14 deletions geomesa-accumulo/geomesa-accumulo-jobs/pom.xml
Expand Up @@ -82,20 +82,6 @@
<groupId>org.geomesa.testcontainers</groupId>
<artifactId>testcontainers-accumulo</artifactId>
</dependency>
<dependency>
<!-- 'works with' due to license issues -->
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>${snappy.java.version}</version>
<scope>test</scope>
</dependency>
<!-- used by hadoop-minicluster -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${hadoop.minicluster.mockito.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

</project>
26 changes: 26 additions & 0 deletions geomesa-fs/geomesa-fs-datastore/pom.xml
Expand Up @@ -64,6 +64,7 @@
<artifactId>hadoop-mapreduce-client-core</artifactId>
</dependency>

<!-- test dependencies -->
<dependency>
<groupId>org.specs2</groupId>
<artifactId>specs2-core_${scala.binary.version}</artifactId>
Expand All @@ -82,6 +83,31 @@
<artifactId>geomesa-fs-storage-orc_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
</dependency>
<dependency>
<groupId>org.geomesa.testcontainers</groupId>
<artifactId>testcontainers-accumulo</artifactId>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
@@ -0,0 +1,59 @@
/***********************************************************************
* Copyright (c) 2013-2024 Commonwealth Computer Research, Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0
* which accompanies this distribution and is available at
* http://www.opensource.org/licenses/apache2.0.php.
***********************************************************************/

package org.locationtech.geomesa.fs

import com.typesafe.scalalogging.StrictLogging
import org.apache.hadoop.conf.Configuration
import org.geomesa.testcontainers.HadoopContainer
import org.testcontainers.utility.DockerImageName

import java.io.{ByteArrayInputStream, StringWriter}
import java.nio.charset.StandardCharsets
import java.util.concurrent.atomic.AtomicBoolean
import scala.util.Try

/**
* Hadoop cluster for testing. Singleton object that is shared between all test classes in the jvm.
*/
object HadoopSharedCluster extends StrictLogging {

val ImageName =
DockerImageName.parse("ghcr.io/geomesa/accumulo-uno")
.withTag(sys.props.getOrElse("accumulo.docker.tag", "2.1.2"))

lazy val Container: HadoopContainer = tryContainer.get

lazy val ContainerConfig: String = {
val conf = new Configuration(false)
conf.addResource(new ByteArrayInputStream(Container.getConfigurationXml.getBytes(StandardCharsets.UTF_8)), "")
conf.set("parquet.compression", "GZIP", "") // default is snappy which is not on our classpath
val writer = new StringWriter()
conf.writeXml(writer)
writer.toString
}

private lazy val tryContainer: Try[HadoopContainer] = Try {
logger.info("Starting Hadoop container")
val container = new HadoopContainer(ImageName)
initialized.getAndSet(true)
container.start()
logger.info("Started Hadoop container")
container
}

private val initialized = new AtomicBoolean(false)

sys.addShutdownHook({
if (initialized.get) {
logger.info("Stopping Hadoop container")
tryContainer.foreach(_.stop())
logger.info("Stopped Hadoop container")
}
})
}
17 changes: 9 additions & 8 deletions geomesa-fs/geomesa-fs-spark-runtime/pom.xml
Expand Up @@ -93,16 +93,17 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>test</scope>
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-fs-datastore_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
</dependency>
<!-- used by hadoop-minicluster -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${hadoop.minicluster.mockito.version}</version>
<scope>test</scope>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
</dependency>
<dependency>
<groupId>org.geomesa.testcontainers</groupId>
<artifactId>testcontainers-accumulo</artifactId>
</dependency>
</dependencies>

Expand Down
Expand Up @@ -9,13 +9,12 @@
package org.locationtech.geomesa.fs.spark

import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.io.FileUtils
import org.apache.hadoop.hdfs.{HdfsConfiguration, MiniDFSCluster}
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.geotools.api.data.{DataStore, DataStoreFinder, Transaction}
import org.geotools.filter.text.ecql.ECQL
import org.junit.runner.RunWith
import org.locationtech.geomesa.features.ScalaSimpleFeature
import org.locationtech.geomesa.fs.HadoopSharedCluster
import org.locationtech.geomesa.spark.SparkSQLTestUtils
import org.locationtech.geomesa.spark.sql.SQLTypes
import org.locationtech.geomesa.utils.geotools.{FeatureUtils, SimpleFeatureTypes}
Expand All @@ -24,8 +23,6 @@ import org.locationtech.geomesa.utils.text.WKTUtils
import org.specs2.mutable.Specification
import org.specs2.runner.JUnitRunner

import java.nio.file.{Files, Path}


@RunWith(classOf[JUnitRunner])
class FileSystemRDDProviderTest extends Specification with LazyLogging {
Expand All @@ -36,26 +33,16 @@ class FileSystemRDDProviderTest extends Specification with LazyLogging {

sequential

val tempDir: Path = Files.createTempDirectory("fsSparkTest")

var cluster: MiniDFSCluster = _
var directory: String = _

var spark: SparkSession = _
var sc: SQLContext = _

lazy val params = Map("fs.path" -> directory)
lazy val path = s"${HadoopSharedCluster.Container.getHdfsUrl}/${getClass.getSimpleName}/"
lazy val params = Map("fs.path" -> path)
lazy val ds: DataStore = DataStoreFinder.getDataStore(params.asJava)

val formats = Seq("orc", "parquet")

step {
// Start MiniCluster
val conf = new HdfsConfiguration()
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tempDir.toFile.getAbsolutePath)
cluster = new MiniDFSCluster.Builder(conf).build()
directory = cluster.getURI + "/data/chicago"

formats.foreach { format =>
val sft = SimpleFeatureTypes.createType(format,
"arrest:String,case_number:Int:index=full:cardinality=high,dtg:Date,*geom:Point:srid=4326")
Expand Down Expand Up @@ -216,8 +203,5 @@ class FileSystemRDDProviderTest extends Specification with LazyLogging {

step {
ds.dispose()
// Stop MiniCluster
cluster.shutdown()
FileUtils.deleteDirectory(tempDir.toFile)
}
}
20 changes: 11 additions & 9 deletions geomesa-fs/geomesa-fs-tools/pom.xml
Expand Up @@ -47,6 +47,10 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
Expand All @@ -69,19 +73,17 @@
<artifactId>specs2-junit_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<groupId>org.locationtech.geomesa</groupId>
<artifactId>geomesa-fs-datastore_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
</dependency>
<!-- used by hadoop-minicluster -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${hadoop.minicluster.mockito.version}</version>
<scope>test</scope>
<groupId>org.geomesa.testcontainers</groupId>
<artifactId>testcontainers-accumulo</artifactId>
</dependency>
</dependencies>

Expand Down

0 comments on commit 4ae4192

Please sign in to comment.