Permalink
Browse files

SDC-6717: Create stage libraries for individual MapR MEP versions

This patch breaks MapR 6.0 stage library into two separate libraries:

* Main MapR 6.0.0 stage library that contains all MapR native
  components (FS, DB, Streams, ...)
* MEP 4 related package (ecosystem) that only contains stage libraries
  that depends on various ecosystem components (Hive, Streaming pipeline
  execution)

It also updates the enabling script to make working with the separate
concept a bit easier.

Change-Id: Idf09381723768c187e857e44e97fbb1abf051d84
Reviewed-on: https://review.streamsets.net/11115
Tested-by: StreamSets CI <streamsets-ci-spam@streamsets.com>
Reviewed-by: Jeff Evans <jeff@streamsets.com>
  • Loading branch information...
jarcec committed Nov 1, 2017
1 parent 6cfae52 commit 9452a03489ddf8ae2af81be9afaa904c7e766a55
@@ -336,6 +336,10 @@
<directory>${basedir}/../${mapr_6_0-lib}/target/streamsets-libs/</directory>
<outputDirectory>/streamsets-libs/</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../${mapr_6_0-mep4-lib}/target/streamsets-libs/</directory>
<outputDirectory>/streamsets-libs/</outputDirectory>
</fileSet>

<!-- Apache Kudu libs -->
<fileSet>
@@ -362,7 +362,7 @@ stage.alias.streamsets-datacollector-cdh_5_10-lib,com_streamsets_pipeline_stage_
# setup-mapr script will not work properly.
#
#system.stagelibs.whitelist=
system.stagelibs.blacklist=streamsets-datacollector-mapr_5_0-lib,streamsets-datacollector-mapr_5_1-lib,streamsets-datacollector-mapr_5_2-lib,streamsets-datacollector-mapr_6_0-lib,streamsets-datacollector-mapr_spark_2_1_mep_3_0-lib
system.stagelibs.blacklist=streamsets-datacollector-mapr_5_0-lib,streamsets-datacollector-mapr_5_1-lib,streamsets-datacollector-mapr_5_2-lib,streamsets-datacollector-mapr_6_0-lib,streamsets-datacollector-mapr_6_0-mep4-lib,streamsets-datacollector-mapr_spark_2_1_mep_3_0-lib
#
#user.stagelibs.whitelist=
#user.stagelibs.blacklist=
@@ -1,4 +1,4 @@
#!/bin/bash
#!/bin/bash -x
#
# Copyright 2017 StreamSets Inc.
#
@@ -30,7 +30,7 @@
BLACKLIST_PROP="system.stagelibs.blacklist"
PROP_FILENAME="sdc.properties"
POLICY_FILENAME="sdc-security.policy"
MAPR_LIB="streamsets-datacollector-mapr"
MAPR_LIB_BASE="streamsets-datacollector-mapr"
MAPR_SPARK_LIB="streamsets-datacollector-mapr_spark*-lib"

if [ -z ${SDC_HOME} ];then
@@ -77,9 +77,9 @@ fi
# Resolve MapR Version
if [[ -z "${MAPR_VERSION}" ]];
then
read -p "Please enter the MapR version (default 5.2.0): " MAPR_VERSION
read -p "Please enter the MapR version (default 6.0.0): " MAPR_VERSION
fi
MAPR_VERSION=${MAPR_VERSION:=5.2.0}
MAPR_VERSION=${MAPR_VERSION:=6.0.0}
# Check if input version is valid
if ! [[ ${MAPR_VERSION} =~ ^[1-9][0-9]*\.[0-9]*(\.[0-9]*)*$ ]];
then
@@ -88,12 +88,22 @@ then
fi
MAPR_VERSION=`expr "$MAPR_VERSION" : '\([1-9][0-9]*\.[0-9]*\)'`
_MAPR_VERSION=${MAPR_VERSION/./_}
MAPR_LIB=${MAPR_LIB}_${_MAPR_VERSION}-lib
MAPR_LIB=${MAPR_LIB_BASE}_${_MAPR_VERSION}-lib
# Check if this Data Collector support the MapR version
if [ ! -d "$SDC_HOME/streamsets-libs/$MAPR_LIB" ]; then
echo "Error: StreamSets Data Collector does not support MapR version $MAPR_VERSION"
exit 0
fi

# MEP is stored in separate stage library (for MapR 6 and above)
if [ ! -z $MAPR_MEP_VERSION ]; then
MAPR_MEP_LIB=${MAPR_LIB_BASE}_${_MAPR_VERSION}-mep${MAPR_MEP_VERSION}-lib
if [ ! -d "$SDC_HOME/streamsets-libs/$MAPR_MEP_LIB" ]; then
echo "Error: StreamSets Data Collector does not support MapR version $MAPR_VERSION MEP $MAPR_MEP_VERSION (stage library $MAPR_MEP_LIB)"
exit 0
fi
fi

# Obtain the version of MapR Spark Data Collector supports
SDC_MAPR_SPARK_LIB=`ls $SDC_HOME/streamsets-libs | grep streamsets-datacollector-mapr_spark_*`
SDC_SPARK_VERSION=`echo $SDC_MAPR_SPARK_LIB | cut -d'_' -f 3,4 | sed 's/_/./g'`
@@ -123,57 +133,80 @@ else
fi
fi

# Remove given library from the list of blacklisted properties
function blacklist_remove {
# Library to be removed from the blacklist
STAGE_LIB=$1
echo "Enabling $STAGE_LIB"

original_property=$(grep -i "$BLACKLIST_PROP" "${SDC_PROP_FILE}")
blacklist_property=${original_property/${STAGE_LIB},/}
if [ ! -z $ENABLE_MAPR_SPARK ];then
echo "Enabling MapR Spark ..."
blacklist_property=${blacklist_property/${SDC_MAPR_SPARK_LIB}/}
fi
sed -i ${BACKUP_EXT} "s/${original_property}/${blacklist_property}/" "${SDC_PROP_FILE}"
}

# Create MapR specific symlinks for given stage library
function create_symlinks {
STAGE_LIB=$1
echo "Working on stage library $STAGE_LIB"

# Create symbolic links. Ignore the stderr (there will be a lot of symlinks already exist errors)
ln -s ${MAPR_HOME}/lib/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/common/lib/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/mapreduce/lib/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/mapreduce/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/lib/maprfs-${MAPR_VERSION}*.jar ${SDC_HOME}/root-lib/ 2>/dev/null
if [ -d ${MAPR_HOME}/hive ];then
ln -s ${MAPR_HOME}/hive/hive-*/lib/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hive/hive-*/hcatalog/share/hcatalog/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
fi
if [ -d ${MAPR_HOME}/hbase ];then
ln -s ${MAPR_HOME}/hbase/hbase-*/lib/*.jar ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/ 2>/dev/null
fi

# SDC-6449: Remove the symlink to metrics-core jar if MapR Hive ships same version as SDC.
SDC_METRICS_CORE_VERSION=`ls ${SDC_HOME}/api-lib | grep "metrics-core" | cut -d'-' -f3 | cut -d'.' -f1`
MAPR_METRICS_CORE_JAR=`ls ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib | grep "metrics-core-${SDC_METRICS_CORE_VERSION}\.[0-9]*\.[0-9]*\.jar"`
if [ ! -z ${MAPR_METRICS_CORE_JAR} ];then
printf "Removing symlink to ${MAPR_METRICS_CORE_JAR} ..."
rm ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/${MAPR_METRICS_CORE_JAR}
printf "Done.\n"
fi

echo "Removing old Hadoop 0.20 jars ...."
rm -rf ${SDC_HOME}/streamsets-libs/${STAGE_LIB}/lib/hadoop-0.20*.jar
}

echo ""
echo "Configuration:"
echo "SDC_HOME=$SDC_HOME"
echo "SDC_CONF=$SDC_CONF"
echo "MAPR_HOME=$MAPR_HOME"
echo "MAPR_VERSION=$MAPR_VERSION"
echo "MAPR_MEP_VERSION=$MAPR_MEP_VERSION"
if [ ! -z $ENABLE_MAPR_SPARK ];then
echo "MAPR_SPARK_VERSION=$MAPR_SPARK_VERSION"
fi
echo ""
echo "Working:"

# Remove MapR Version and MapR Spark from sdc.properies file
printf "Updating sdc.properties file ...."
original_property=$(grep -i "$BLACKLIST_PROP" "${SDC_PROP_FILE}")
blacklist_property=${original_property/${MAPR_LIB},/}
if [ ! -z $ENABLE_MAPR_SPARK ];then
echo "Enabling MapR Spark ..."
blacklist_property=${blacklist_property/${SDC_MAPR_SPARK_LIB}/}
fi
sed -i ${BACKUP_EXT} "s/${original_property}/${blacklist_property}/" "${SDC_PROP_FILE}"
printf "Done.\n"

# Create symbolic links. Ignore the stderr (there will be a lot of symlinks already exist errors)
printf "Creating symbolic links ...."
ln -s ${MAPR_HOME}/lib/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/common/lib/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/mapreduce/lib/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hadoop/hadoop-*/share/hadoop/mapreduce/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/lib/maprfs-${MAPR_VERSION}*.jar ${SDC_HOME}/root-lib/ 2>/dev/null
if [ -d ${MAPR_HOME}/hive ];then
ln -s ${MAPR_HOME}/hive/hive-*/lib/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
ln -s ${MAPR_HOME}/hive/hive-*/hcatalog/share/hcatalog/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
fi
if [ -d ${MAPR_HOME}/hbase ];then
ln -s ${MAPR_HOME}/hbase/hbase-*/lib/*.jar ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/ 2>/dev/null
fi
printf "Done.\n"

# SDC-6449: Remove the symlink to metrics-core jar if MapR Hive ships same version as SDC.
SDC_METRICS_CORE_VERSION=`ls ${SDC_HOME}/api-lib | grep "metrics-core" | cut -d'-' -f3 | cut -d'.' -f1`
MAPR_METRICS_CORE_JAR=`ls ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib | grep "metrics-core-${SDC_METRICS_CORE_VERSION}\.[0-9]*\.[0-9]*\.jar"`
if [ ! -z ${MAPR_METRICS_CORE_JAR} ];then
printf "Removing symlink to ${MAPR_METRICS_CORE_JAR} ..."
rm ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/${MAPR_METRICS_CORE_JAR}
printf "Done.\n"
fi

printf "Removing old Hadoop 0.20 jars ...."
rm -rf ${SDC_HOME}/streamsets-libs/${MAPR_LIB}/lib/hadoop-0.20*.jar
printf "Done.\n"
echo "Updating sdc.properties file ...."
blacklist_remove $MAPR_LIB
if [ ! -z $MAPR_MEP_LIB ]; then
blacklist_remove $MAPR_MEP_LIB
fi
echo "Done."
echo ""

echo "Symlinking MapR Jar files"
create_symlinks $MAPR_LIB
if [ ! -z $MAPR_MEP_LIB ]; then
create_symlinks $MAPR_MEP_LIB
fi
echo "Done."
echo ""

# Add permission to sdc-security.policy file
printf "Updating sdc-security.policy file ..."
@@ -181,27 +181,6 @@
<version>${jackson.version}</version>
</dependency>

<!-- Hive related -->
<dependency>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-streaming</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
</dependency>

<!-- Protolibs -->

<dependency>
@@ -241,26 +220,6 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-maprstreams-source-protolib</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-maprstreams-target-protolib</artifactId>
@@ -269,51 +228,7 @@
</dependency>
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-hive-protolib</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-streaming</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive.hcatalog</groupId>
<artifactId>hive-hcatalog-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-sdc-kafka-09-mapr51</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</exclusion>
<exclusion>
<groupId>com.101tec</groupId>
<artifactId>zkclient</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- include the LoginUgiProvider implementation for MapR with workaround -->
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-mapr-common</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-mapreduce-protolib</artifactId>
<artifactId>streamsets-datacollector-maprstreams-multisource-protolib</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
@@ -330,80 +245,14 @@
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-spark-api</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>jackson-mapper-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependency>

<!-- include the LoginUgiProvider implementation for MapR with workaround -->
<dependency>
<groupId>com.streamsets</groupId>
<artifactId>streamsets-datacollector-spark-processor-protolib</artifactId>
<artifactId>streamsets-datacollector-mapr-common</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>jackson-mapper-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<artifactId>jackson-mapper-asl</artifactId>
<groupId>org.codehaus.jackson</groupId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>

</dependencies>
Oops, something went wrong.

0 comments on commit 9452a03

Please sign in to comment.