Skip to content

Commit

Permalink
Removing support for spark 1.x (#212)
Browse files Browse the repository at this point in the history
* Removing support for spark 1.x

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

* Upgrade gradle to 8.1.1

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

* Fix kerberos gradle

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

* Fix left over references from deserialization

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

* Fix gradle issues while trying to distribute

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

* Removing spark-13 workflow

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>

---------

Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>
  • Loading branch information
harshavamsi committed May 3, 2023
1 parent 216aa8b commit 195bebf
Show file tree
Hide file tree
Showing 82 changed files with 47 additions and 8,420 deletions.
47 changes: 0 additions & 47 deletions .github/workflows/build-spark-13.yml

This file was deleted.

9 changes: 0 additions & 9 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,4 @@ if (project.hasProperty("find-artifact")) {
}
}
}
}

// Add a task in the project that collects all the dependencyReport data for each project
// Concatenates the dependencies CSV files into a single file
// usage: ./gradlew :dist:generateDependenciesReport -Dcsv=/tmp/deps.csv
task generateDependenciesReport(type: ConcatFilesTask) {
files = fileTree(dir: project.rootDir, include: '**/dependencies.csv' )
headerLine = "name,version,url,license"
target = new File(System.getProperty('csv')?: "${project.buildDir}/reports/dependencies/opensearch-hadoop-dependencies.csv")
}
4 changes: 0 additions & 4 deletions buildSrc/esh-version.properties~

This file was deleted.

2 changes: 1 addition & 1 deletion buildSrc/opensearch-hadoop-version.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
opensearch_hadoop = 3.0.0
opensearch_hadoop = 1.0.0
opensearch = 3.0.0
lucene = 9.5.0-snapshot-a4ef70f
build-tools = 3.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -513,18 +513,12 @@ class BuildPlugin implements Plugin<Project> {
pack.dependsOn(project.tasks.jar)
pack.dependsOn(project.tasks.javadocJar)
pack.dependsOn(project.tasks.sourcesJar)
pack.outputs.files(project.tasks.jar.archivePath, project.tasks.javadocJar.archivePath, project.tasks.sourcesJar.archivePath)
project.getPlugins().withType(SparkVariantPlugin).whenPluginAdded {
SparkVariantPluginExtension sparkVariants = project.getExtensions().getByType(SparkVariantPluginExtension.class)
sparkVariants.featureVariants { SparkVariant variant ->
pack.dependsOn(project.tasks.getByName(variant.taskName('jar')))
pack.dependsOn(project.tasks.getByName(variant.taskName('javadocJar')))
pack.dependsOn(project.tasks.getByName(variant.taskName('sourcesJar')))
pack.outputs.files(
project.tasks.getByName(variant.taskName('jar')).archivePath,
project.tasks.getByName(variant.taskName('javadocJar')).archivePath,
project.tasks.getByName(variant.taskName('sourcesJar')).archivePath
)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class InstanceInfo {
* as well as a groovy AntBuilder, to enable running ant condition checks. The default wait
* condition is for http on the http port.
*/
Closure waitCondition = { InstanceInfo instanceInfo, AntBuilder ant ->
Closure waitCondition = { InstanceInfo instanceInfo, groovy.ant.AntBuilder ant ->
String waitUrl = instanceInfo.httpUri()
if (waitUrl == null) {
return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,33 +45,27 @@
import java.util.LinkedHashSet;
import java.util.List;

import org.gradle.api.file.ConfigurableFileCollection;

/**
* Concatenates a list of files into one and removes duplicate lines.
*/
public class ConcatFilesTask extends DefaultTask {
public abstract class ConcatFilesTask extends DefaultTask {

public ConcatFilesTask() {
setDescription("Concat a list of files into one.");
}

/** List of files to concatenate */
private FileTree files;

/** line to add at the top of the target file */
private String headerLine;

private File target;

private List<String> additionalLines = new ArrayList<>();

public void setFiles(FileTree files) {
this.files = files;
}

@InputFiles
public FileTree getFiles() {
return files;
}
public abstract ConfigurableFileCollection getFiles();

public void setHeaderLine(String headerLine) {
this.headerLine = headerLine;
Expand Down
8 changes: 7 additions & 1 deletion dist/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,15 @@ distribution {

// Add a task in the root project that collects all the dependencyReport data for each project
// Concatenates the dependencies CSV files into a single file
task generateDependenciesReport(type: ConcatFilesTask) {
task generateDependenciesReport(type: ConcatFilesTask) { concatDepsTask ->
dependsOn rootProject.allprojects.collect { it.tasks.withType(DependenciesInfoTask) }
rootProject.allprojects.collect {
files = fileTree(dir: project.rootDir, include: '**/dependencies.csv' )
it.tasks.withType(DependenciesInfoTask) { depTask ->
concatDepsTask.dependsOn depTask
concatDepsTask.getFiles().from(depTask.outputFile)
}
}
headerLine = "name,version,url,license"
target = new File(System.getProperty('csv')?: "${project.buildDir}/reports/dependencies/opensearch-hadoop-dependencies.csv")
}
Expand Down
1 change: 0 additions & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ jodaVersion = 2.9.3
jacksonVersion = 1.8.8

# Spark
spark13Version = 1.6.2
spark20Version = 2.3.0
spark22Version = 2.2.3
spark24Version = 2.4.4
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
3 changes: 2 additions & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.1-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
12 changes: 8 additions & 4 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
Expand All @@ -80,10 +80,10 @@ do
esac
done

APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit

APP_NAME="Gradle"
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
Expand Down Expand Up @@ -143,12 +143,16 @@ fi
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
Expand Down
1 change: 1 addition & 0 deletions gradlew.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

Expand Down
34 changes: 17 additions & 17 deletions qa/kerberos/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ if (disableTests) {
// Run on namenode since the gateway is not yet set up
runOn(config.service('hadoop').role('namenode').instance(0))
dependsOn(jar)
jobJar = jar.archivePath
jobJar = jar.getArchiveFile().get().getAsFile()
jobClass = 'org.opensearch.hadoop.qa.kerberos.dfs.SecureFsShell'
systemProperties([
"test.krb5.principal": namenodePrincipal,
Expand All @@ -369,7 +369,7 @@ if (disableTests) {
// Run on namenode since the gateway is not yet set up
runOn(config.service('hadoop').role('namenode').instance(0))
dependsOn(jar, createTmp)
jobJar = jar.archivePath
jobJar = jar.getArchiveFile().get().getAsFile()
jobClass = 'org.opensearch.hadoop.qa.kerberos.dfs.SecureFsShell'
systemProperties([
"test.krb5.principal": namenodePrincipal,
Expand All @@ -387,7 +387,7 @@ if (disableTests) {
clusterConfiguration = config
executedOn = config.service('hadoop').role('namenode').instance(0)
dependsOn(jar)
jobJar = jar.archivePath
jobJar = jar.getArchiveFile().get().getAsFile()
jobClass = 'org.opensearch.hadoop.qa.kerberos.dfs.SecureFsShell'
systemProperties([
"test.krb5.principal": clientPrincipal,
Expand All @@ -402,7 +402,7 @@ if (disableTests) {
HadoopMRJob copyData = config.createClusterTask('copyData', HadoopMRJob.class) {
clusterConfiguration = config
dependsOn(createDataDir, jar)
jobJar = jar.archivePath
jobJar = jar.getArchiveFile().get().getAsFile()
jobClass = 'org.opensearch.hadoop.qa.kerberos.dfs.SecureFsShell'
systemProperties([
"test.krb5.principal": clientPrincipal,
Expand All @@ -421,8 +421,8 @@ if (disableTests) {
clusterConfiguration = config
useCluster(testClusters.integTest)
dependsOn(copyData, setupUsers)
jobJar = jar.archivePath
libJars(mrJar.archivePath, kerberosItestJar.archivePath)
jobJar = jar.getArchiveFile().get().getAsFile()
libJars(mrJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
jobClass = 'org.opensearch.hadoop.qa.kerberos.mr.LoadToES'
jobSettings([
'opensearch.resource': 'qa_kerberos_mr_data',
Expand All @@ -448,8 +448,8 @@ if (disableTests) {
clusterConfiguration = config
useCluster(testClusters.integTest)
dependsOn(mrLoadData)
jobJar = jar.archivePath
libJars(mrJar.archivePath, kerberosItestJar.archivePath)
jobJar = jar.getArchiveFile().get().getAsFile()
libJars(mrJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
jobClass = 'org.opensearch.hadoop.qa.kerberos.mr.ReadFromES'
jobSettings([
'opensearch.resource': 'qa_kerberos_mr_data',
Expand Down Expand Up @@ -481,8 +481,8 @@ if (disableTests) {
// deployModeCluster()
// principal = clientPrincipal + realm
// keytab = clientKeytab.toString()
jobJar = jar.archivePath
libJars(sparkJar.archivePath, kerberosItestJar.archivePath)
jobJar = jar.getArchiveFile().get().getAsFile()
libJars(sparkJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
jobClass = 'org.opensearch.hadoop.qa.kerberos.spark.LoadToES'
jobSettings([
'spark.opensearch.resource': 'qa_kerberos_spark_data',
Expand Down Expand Up @@ -513,8 +513,8 @@ if (disableTests) {
// deployModeCluster()
// principal = clientPrincipal + realm
// keytab = clientKeytab.toString()
jobJar = jar.archivePath
libJars(sparkJar.archivePath, kerberosItestJar.archivePath)
jobJar = jar.getArchiveFile().get().getAsFile()
libJars(sparkJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
jobClass = 'org.opensearch.hadoop.qa.kerberos.spark.ReadFromES'
jobSettings([
'spark.opensearch.resource': 'qa_kerberos_spark_data',
Expand Down Expand Up @@ -560,13 +560,13 @@ if (disableTests) {
dependsOn(jar, setupUsers, copyData, patchBeeline)
hivePrincipal = hivePrincipalName + realm
script = new File(resourceDir, 'hive/load_to_opensearch.sql')
libJars(hiveJar.archivePath, kerberosItestJar.archivePath)
libJars(hiveJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
environmentVariables.putAll([
'HADOOP_CLIENT_OPTS':
"-Djava.security.krb5.conf=${krb5Conf.toString()} " +
"-Dtest.krb5.principal=$clientPrincipal$realm " +
"-Dtest.krb5.keytab=${clientKeytab.toString()} ",
'TEST_LIB': jar.archivePath.toString()
'TEST_LIB': jar.getArchiveFile().get().getAsFile().toString()
])
}
integrationTest.dependsOn(hiveLoadData)
Expand All @@ -577,13 +577,13 @@ if (disableTests) {
dependsOn(hiveLoadData)
hivePrincipal = hivePrincipalName + realm
script = new File(resourceDir, 'hive/read_from_opensearch.sql')
libJars(hiveJar.archivePath, kerberosItestJar.archivePath)
libJars(hiveJar.getArchiveFile().get().getAsFile(), kerberosItestJar.getArchiveFile().get().getAsFile())
environmentVariables.putAll([
'HADOOP_CLIENT_OPTS':
"-Djava.security.krb5.conf=${krb5Conf.toString()} " +
"-Dtest.krb5.principal=$clientPrincipal$realm " +
"-Dtest.krb5.keytab=${clientKeytab.toString()} ",
'TEST_LIB': jar.archivePath.toString()
'TEST_LIB': jar.getArchiveFile().get().getAsFile().toString()
])
}
integrationTest.dependsOn(hiveReadData)
Expand Down Expand Up @@ -617,7 +617,7 @@ if (disableTests) {
HadoopMRJob copyOutputTask = config.createClusterTask("copy${integrationName.capitalize()}Output".toString(), HadoopMRJob.class) {
clusterConfiguration = config
dependsOn(integrationReadTask, createOutputDataDir)
jobJar = jar.archivePath
jobJar = jar.getArchiveFile().get().getAsFile()
jobClass = 'org.opensearch.hadoop.qa.kerberos.dfs.SecureFsShell'
systemProperties([
"test.krb5.principal": clientPrincipal,
Expand Down
6 changes: 1 addition & 5 deletions settings.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pluginManagement {
plugins {
id 'com.github.johnrengelman.shadow' version "6.1.0"
id 'com.github.johnrengelman.shadow' version "8.1.1"
}
}

Expand All @@ -17,10 +17,6 @@ include 'spark-core'
project(":spark-core").projectDir = new File(settingsDir, "spark/core")
project(":spark-core").name = "opensearch-spark"

include 'sql-13'
project(":sql-13").projectDir = new File(settingsDir, "spark/sql-13")
project(":sql-13").name = "opensearch-spark-13"

include 'sql-20'
project(":sql-20").projectDir = new File(settingsDir, "spark/sql-20")
project(":sql-20").name = "opensearch-spark-20"
Expand Down
2 changes: 0 additions & 2 deletions spark/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ sparkVariants {
addCoreFeatureVariant "spark30scala212", spark30Version, scala212Version
addCoreFeatureVariant "spark20scala211", spark24Version, scala211Version
addCoreFeatureVariant "spark20scala210", spark22Version, scala210Version
addCoreFeatureVariant "spark13scala211", spark13Version, scala211Version
addCoreFeatureVariant "spark13scala210", spark13Version, scala210Version

all { SparkVariantPlugin.SparkVariant variant ->

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ abstract class CompatUtils {

static void checkSparkLibraryCompatibility(boolean throwOnIncompatible) {
// check whether the correct opensearch-hadoop is used with the correct Spark version
boolean isSpark13Level = ObjectUtils.isClassPresent("org.apache.spark.sql.DataFrame", SparkConf.class.getClassLoader());
boolean isSpark20Level = ObjectUtils.isClassPresent("org.apache.spark.sql.streaming.StreamingQuery", SparkConf.class.getClassLoader());

try {
Expand All @@ -72,12 +71,12 @@ static void checkSparkLibraryCompatibility(boolean throwOnIncompatible) {

String errorMessage = null;

if (!(isSpark13Level || isSpark20Level)) {
if (!(isSpark20Level)) {
String sparkVersion = getSparkVersionOr("1.0-1.2");
errorMessage = String.format("Incorrect classpath detected; OpenSearch Spark compiled for Spark %s but used with unsupported Spark version %s",
esSupportedSparkVersion, sparkVersion);
} else if (isSpark20Level != isEshForSpark20) { // XOR can be applied as well but != increases readability
String sparkVersion = getSparkVersionOr(isSpark13Level ? "1.3-1.6" : "2.0+");
String sparkVersion = getSparkVersionOr("2.0+");
errorMessage = String.format("Incorrect classpath detected; OpenSearch Spark compiled for Spark %s but used with Spark %s",
esSupportedSparkVersion, sparkVersion);
}
Expand Down

0 comments on commit 195bebf

Please sign in to comment.