From 658af93c70e9084249b386c092390987930e08c5 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Nov 2014 01:52:32 -0500 Subject: [PATCH 01/12] update broken hadoop link --- create_image.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/create_image.sh b/create_image.sh index 6b42447..f75c338 100755 --- a/create_image.sh +++ b/create_image.sh @@ -65,8 +65,7 @@ source ~/.bash_profile sudo mkdir /root/hadoop-native cd /tmp sudo yum install -y protobuf-compiler cmake openssl-devel -wget "http://apache.mirrors.tds.net/hadoop/common/"\ -"hadoop-2.4.1/hadoop-2.4.1-src.tar.gz" +wget "http://archive.apache.org/dist/hadoop/common/hadoop-2.4.1/hadoop-2.4.1-src.tar.gz" tar xvzf hadoop-2.4.1-src.tar.gz cd hadoop-2.4.1-src mvn package -Pdist,native -DskipTests -Dtar From 29051d5d35c7774c0f511d8329833bc88cc798cb Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 6 Nov 2014 02:06:10 -0500 Subject: [PATCH 02/12] update Maven link --- create_image.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/create_image.sh b/create_image.sh index f75c338..4e6b44e 100755 --- a/create_image.sh +++ b/create_image.sh @@ -48,8 +48,7 @@ done # Install Maven (for Hadoop) cd /tmp -wget "http://apache.osuosl.org/maven/maven-3/3.2.3/binaries/"\ -"apache-maven-3.2.3-bin.tar.gz" +wget "http://archive.apache.org/dist/maven/maven-3/3.2.3/binaries/apache-maven-3.2.3-bin.tar.gz" tar xvzf apache-maven-3.2.3-bin.tar.gz mv apache-maven-3.2.3 /opt/ From 9a07db05ef94c114c9bbc792e78d2b62c00b2689 Mon Sep 17 00:00:00 2001 From: andrewor14 Date: Wed, 3 Dec 2014 13:40:55 -0800 Subject: [PATCH 03/12] Support Spark 1.1.1 --- spark/init.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spark/init.sh b/spark/init.sh index 4f1bb38..405dd3b 100755 --- a/spark/init.sh +++ b/spark/init.sh @@ -95,6 +95,13 @@ else wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-cdh4.tgz fi ;; + 1.1.1) + if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then + wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-hadoop1.tgz + else + wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-cdh4.tgz + fi + ;; *) echo "ERROR: Unknown Spark version" return From dc2a08ab982ac44212e3f1379a09f07d99e08a39 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 18 Dec 2014 15:02:58 -0800 Subject: [PATCH 04/12] Adding Spark 1.2.0 --- spark/init.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spark/init.sh b/spark/init.sh index 405dd3b..b25c495 100755 --- a/spark/init.sh +++ b/spark/init.sh @@ -102,6 +102,13 @@ else wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-cdh4.tgz fi ;; + 1.2.0) + if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then + wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.0-bin-hadoop1.tgz + else + wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.0-bin-cdh4.tgz + fi + ;; *) echo "ERROR: Unknown Spark version" return From 8ee9e62347335880430e02e6a68176ae5ddcfdac Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Mon, 22 Dec 2014 14:23:09 -0800 Subject: [PATCH 05/12] Add realpath to fix rJava install issues --- setup-slave.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/setup-slave.sh b/setup-slave.sh index f2c262d..16e231c 100755 --- a/setup-slave.sh +++ b/setup-slave.sh @@ -107,3 +107,10 @@ echo 1 > /proc/sys/vm/overcommit_memory # Add github to known hosts to get git@github.com clone to work # TODO(shivaram): Avoid duplicate entries ? cat /root/spark-ec2/github.hostkey >> /root/.ssh/known_hosts + +# Create /usr/bin/realpath which is used by R to find Java installations +# NOTE: /usr/bin/realpath is missing in CentOS AMIs. See +# http://superuser.com/questions/771104/usr-bin-realpath-not-found-in-centos-6-5 +echo '#!/bin/bash' > /usr/bin/realpath +echo 'readlink -e "$@"' >> /usr/bin/realpath +chmod a+x /usr/bin/realpath From e308e5555cf7b4dcfc9bfdb497d2f73549396e8b Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Mon, 22 Dec 2014 15:00:07 -0800 Subject: [PATCH 06/12] Add realpath to create_image.sh --- create_image.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/create_image.sh b/create_image.sh index 4e6b44e..05a8cfd 100755 --- a/create_image.sh +++ b/create_image.sh @@ -73,3 +73,10 @@ sudo mv hadoop-dist/target/hadoop-2.4.1/lib/native/* /root/hadoop-native # Install Snappy lib (for Hadoop) yum install -y snappy ln -sf /usr/lib64/libsnappy.so.1 /root/hadoop-native/. + +# Create /usr/bin/realpath which is used by R to find Java installations +# NOTE: /usr/bin/realpath is missing in CentOS AMIs. See +# http://superuser.com/questions/771104/usr-bin-realpath-not-found-in-centos-6-5 +echo '#!/bin/bash' > /usr/bin/realpath +echo 'readlink -e "$@"' >> /usr/bin/realpath +chmod a+x /usr/bin/realpath From 8d0a903e190cbc032af31ce156f3b907fd902ee9 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 10 Nov 2014 16:53:05 -0500 Subject: [PATCH 07/12] Use pssh. Replace bash-isms with pssh to neatly parallelize cluster operations. Also, decrease questionably high sleep times. --- setup.sh | 63 +++++++++++++++----------------------------------------- 1 file changed, 17 insertions(+), 46 deletions(-) diff --git a/setup.sh b/setup.sh index e040661..406efe0 100755 --- a/setup.sh +++ b/setup.sh @@ -1,5 +1,7 @@ #!/bin/bash +yum install -y pssh + # Make sure we are in the spark-ec2 directory cd /root/spark-ec2 @@ -42,60 +44,29 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "Running setup-slave on master to mount filesystems, etc..." -source ./setup-slave.sh - -echo "SSH'ing to master machine(s) to approve key(s)..." -for master in $MASTERS; do - echo $master - ssh $SSH_OPTS $master echo -n & - sleep 0.3 -done -ssh $SSH_OPTS localhost echo -n & -ssh $SSH_OPTS `hostname` echo -n & -wait - -# Try to SSH to each cluster node to approve their key. Since some nodes may -# be slow in starting, we retry failed slaves up to 3 times. -TODO="$SLAVES $OTHER_MASTERS" # List of nodes to try (initially all) -TRIES="0" # Number of times we've tried so far -echo "SSH'ing to other cluster nodes to approve keys..." -while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do - NEW_TODO= - for slave in $TODO; do - echo $slave - ssh $SSH_OPTS $slave echo -n - if [ $? != 0 ] ; then - NEW_TODO="$NEW_TODO $slave" - fi - done - TRIES=$[$TRIES + 1] - if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then - sleep 15 - TODO="$NEW_TODO" - echo "Re-attempting SSH to cluster nodes to approve keys..." - else - break; - fi -done +echo "SSH-ing to all cluster nodes to approve keys..." +pssh --inline \ + --host "localhost $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do echo $node rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root & scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & - sleep 0.3 + sleep 0.1 done wait -# NOTE: We need to rsync spark-ec2 before we can run setup-slave.sh -# on other cluster nodes -echo "Running slave setup script on other cluster nodes..." -for node in $SLAVES $OTHER_MASTERS; do - echo $node - ssh -t -t $SSH_OPTS root@$node "spark-ec2/setup-slave.sh" & sleep 0.3 -done -wait +echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." +pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" + # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. @@ -126,6 +97,6 @@ chmod u+x /root/spark/conf/spark-env.sh for module in $MODULES; do echo "Setting up $module" source ./$module/setup.sh - sleep 1 + sleep 0.1 cd /root/spark-ec2 # guard against setup.sh changing the cwd done From ee5c0856ad1978a9d0122922338adb65a43ae6d0 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 11 Nov 2014 15:31:08 -0500 Subject: [PATCH 08/12] Approve key to local hostname. --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 406efe0..bd8fedc 100755 --- a/setup.sh +++ b/setup.sh @@ -46,7 +46,7 @@ find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x echo "SSH-ing to all cluster nodes to approve keys..." pssh --inline \ - --host "localhost $MASTERS $SLAVES" \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ --user root \ --extra-args "$SSH_OPTS" \ ":" From a6c6b851bc58e831748612293600c2596e87cd71 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Sun, 16 Nov 2014 23:02:08 -0500 Subject: [PATCH 09/12] approve keys twice --- setup.sh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/setup.sh b/setup.sh index bd8fedc..bf35bb6 100755 --- a/setup.sh +++ b/setup.sh @@ -11,6 +11,14 @@ source /root/.bash_profile # Load the cluster variables set by the deploy script source ec2-variables.sh +function approve_ssh_keys () { + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" +} + # Set hostname based on EC2 private DNS name, so that it is set correctly # even if the instance is restarted with a different private DNS name PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` @@ -45,11 +53,7 @@ echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x echo "SSH-ing to all cluster nodes to approve keys..." -pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" +approve_ssh_keys echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do @@ -67,6 +71,9 @@ pssh --inline \ --extra-args "-t -t $SSH_OPTS" \ "spark-ec2/setup-slave.sh" +echo "SSH-ing to all cluster nodes to re-approve keys..." +# We do this again because setup-slave.sh clears out .ssh/known_hosts. +approve_ssh_keys # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. From c0f60f6d34f2dc9ca67984708d3fc8f387219964 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 18 Nov 2014 12:29:40 -0500 Subject: [PATCH 10/12] time the pssh calls --- setup.sh | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/setup.sh b/setup.sh index bf35bb6..40c84e2 100755 --- a/setup.sh +++ b/setup.sh @@ -12,11 +12,14 @@ source /root/.bash_profile source ec2-variables.sh function approve_ssh_keys () { - pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" + time { + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" + } + echo " ✝ approve_ssh_keys" } # Set hostname based on EC2 private DNS name, so that it is set correctly @@ -65,11 +68,14 @@ done wait echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." -pssh --inline \ - --host "$MASTERS $SLAVES" \ - --user root \ - --extra-args "-t -t $SSH_OPTS" \ - "spark-ec2/setup-slave.sh" +time { + pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" +} +echo " ✝ setup-slave" echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. From 8913fe1a316afc24e3e639af31e54d3e6a8eb143 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Thu, 27 Nov 2014 17:00:28 -0500 Subject: [PATCH 11/12] Revert "time the pssh calls" This reverts commit d9333af0a21d1aff3216471823344f326ef84cfb. --- setup.sh | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/setup.sh b/setup.sh index 40c84e2..bf35bb6 100755 --- a/setup.sh +++ b/setup.sh @@ -12,14 +12,11 @@ source /root/.bash_profile source ec2-variables.sh function approve_ssh_keys () { - time { - pssh --inline \ - --host "localhost $(hostname) $MASTERS $SLAVES" \ - --user root \ - --extra-args "$SSH_OPTS" \ - ":" - } - echo " ✝ approve_ssh_keys" + pssh --inline \ + --host "localhost $(hostname) $MASTERS $SLAVES" \ + --user root \ + --extra-args "$SSH_OPTS" \ + ":" } # Set hostname based on EC2 private DNS name, so that it is set correctly @@ -68,14 +65,11 @@ done wait echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." -time { - pssh --inline \ - --host "$MASTERS $SLAVES" \ - --user root \ - --extra-args "-t -t $SSH_OPTS" \ - "spark-ec2/setup-slave.sh" -} -echo " ✝ setup-slave" +pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user root \ + --extra-args "-t -t $SSH_OPTS" \ + "spark-ec2/setup-slave.sh" echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. From 658d88c83025cf34f6ff1b1f25bd447835990305 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Mon, 22 Dec 2014 22:00:03 -0500 Subject: [PATCH 12/12] test removing ssh pre-approval --- setup.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.sh b/setup.sh index bf35bb6..cdacfc0 100755 --- a/setup.sh +++ b/setup.sh @@ -52,8 +52,8 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "SSH-ing to all cluster nodes to approve keys..." -approve_ssh_keys +# echo "SSH-ing to all cluster nodes to approve keys..." +# approve_ssh_keys echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." for node in $SLAVES $OTHER_MASTERS; do @@ -71,9 +71,9 @@ pssh --inline \ --extra-args "-t -t $SSH_OPTS" \ "spark-ec2/setup-slave.sh" -echo "SSH-ing to all cluster nodes to re-approve keys..." +# echo "SSH-ing to all cluster nodes to re-approve keys..." # We do this again because setup-slave.sh clears out .ssh/known_hosts. -approve_ssh_keys +# approve_ssh_keys # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts.