Permalink
Browse files

Merge pull request #202 from sapei/master

Add Dockerfile for building runtime environment for schedoscope-tutorial
  • Loading branch information...
utzwestermann committed Aug 17, 2018
2 parents 75686c7 + 824d084 commit bdd41e428132223dfdcd82921d745168bed6fc89
@@ -0,0 +1 @@
src
@@ -0,0 +1,34 @@
FROM cloudera/quickstart:5.7.0-0-beta
MAINTAINER "notExist <notExist@ottogroup.com>"
ADD Dockerfile /Dockerfile
RUN curl -L -b "oraclelicense=a" http://download.oracle.com/otn-pub/java/jdk/8u181-b13/96a7b8442fe848ef90c96a2fad6ed6d1/jdk-8u181-linux-x64.tar.gz | tar xvz -C /usr/java \
&& echo "export JAVA_HOME=/usr/java/jdk1.8.0_181" >> /etc/default/cloudera-scm-server \
&& echo "export JAVA_HOME=/usr/java/jdk1.8.0_181" >> /etc/bashrc \
&& echo "export MAVEN_HOME=/usr/local/apache-maven/apache-maven-3.0.4" >> /etc/bashrc \
&& echo "export PATH=\$JAVA_HOME/bin:\$MAVEN_HOME/bin:\$PATH" >> /etc/bashrc
RUN yum remove -y git ; yum clean all \
&& yum install -y --nogpgcheck wget curl-devel expat-devel gettext-devel openssl-devel zlib-devel gcc perl-ExtUtils ; yum clean all \
&& yum update -y nss curl libcurl ; yum clean all \
&& wget --no-check-certificate -qO- https://www.kernel.org/pub/software/scm/git/git-2.9.5.tar.gz | tar xvz -C /usr/local/src \
&& cd /usr/local/src/git-2.9.5 \
&& make prefix=/usr/local/git-2.9.5 all \
&& make prefix=/usr/local/git-2.9.5 install \
&& ln -s /usr/local/git-2.9.5/bin/git /usr/bin/git
RUN sed -i 's/\/5/\/5.14.0/g' /etc/yum.repos.d/cloudera-manager.repo \
&& yum --nogpgcheck -y upgrade cloudera-manager-server cloudera-manager-daemons cloudera-manager-agent ; yum clean all
RUN wget -qO- http://archive.cloudera.com/spark2/csd/SPARK2_ON_YARN-2.2.0.cloudera2.jar > /opt/cloudera/csd/SPARK2_ON_YARN-2.2.0.cloudera2.jar \
&& chown cloudera-scm:cloudera-scm /opt/cloudera/csd/SPARK2_ON_YARN-2.2.0.cloudera2.jar \
&& chmod 644 /opt/cloudera/csd/SPARK2_ON_YARN-2.2.0.cloudera2.jar
RUN yum -y install python-pip ; yum clean all \
&& pip install cm-api
COPY scripts/parcel-installer.py /root/parcel-installer.py
COPY scripts/prepare-env.sh /root/prepare-env.sh
CMD /root/prepare-env.sh
@@ -0,0 +1,86 @@
import time
from cm_api.api_client import ApiResource
# configuration
JDK_CONFIG = {'java_home' : '/usr/java/jdk1.8.0_181'}
PARCEL_REPO = 'http://archive.cloudera.com/cdh5/parcels/5.14.0/,http://archive.cloudera.com/spark2/parcels/2.2.0.cloudera2/'
PARCELS = [
{ 'name' : "CDH", 'version' : "5.14.0-1.cdh5.14.0.p0.24" },
{ 'name' : "SPARK2", 'version' : "2.2.0.cloudera2-1.cdh5.12.0.p0.232957" }
]
class ParcelInstaller:
def __init__(self, name, version):
self.name = name
self.version = version
def install(self, cluster):
parcel = cluster.get_parcel(self.name, self.version)
# download the parcel
print "Starting to download parcel %s %s" % (self.name, self.version)
cmd = parcel.start_download()
if cmd.success != True:
print "Download parcel %s %s has been failed!" % (self.name, self.version)
exit(0)
while parcel.stage != 'DOWNLOADED':
time.sleep(5)
parcel = cluster.get_parcel(self.name, self.version)
if parcel.state.errors:
raise Exception(str(parcel.state.errors))
completed = (float(parcel.state.progress) / float(parcel.state.totalProgress)) * 100
print "download progress: %.2f%%" % round(completed, 2)
print "Parcel %s %s has been downloaded." % (self.name, self.version)
# distribute the parcel
print "Starting to distribute parcel %s %s" % (self.name, self.version)
cmd = parcel.start_distribution()
if cmd.success != True:
print "Distribution of parcel %s %s has been failed!" % (self.name, self.version)
exit(0)
while parcel.stage != 'DISTRIBUTED':
time.sleep(5)
parcel = cluster.get_parcel(self.name, self.version)
if parcel.state.errors:
raise Exception(str(parcel.state.errors))
completed = (float(parcel.state.progress) / float(parcel.state.totalProgress)) * 100
print "distribution progress: %.2f%%" % round(completed, 2)
print "Parcel %s %s has been distributed" % (self.name, self.version)
print "Activating parcel %s %s" % (self.name, self.version)
cmd = parcel.activate()
if cmd.success != True:
print "Parcel %s %s activation failed!" % (self.name, self.version)
exit(0)
while parcel.stage != "ACTIVATED":
parcel = cluster.get_parcel(self.name, self.version)
print "Parcel %s %s has been activated." % (self.name, self.version)
def main():
resource = ApiResource("localhost", 7180, "cloudera", "cloudera", version=19)
cluster = resource.get_cluster("Cloudera Quickstart")
cm_manager = resource.get_cloudera_manager()
cm_manager.update_config({'REMOTE_PARCEL_REPO_URLS': PARCEL_REPO})
cm_manager.update_all_hosts_config(JDK_CONFIG)
time.sleep(5)
for parcel in PARCELS:
ParcelInstaller(parcel['name'], parcel['version']).install(cluster)
print "Restarting cluster"
cluster.stop().wait()
cluster.start().wait()
print "Done restarting cluster"
if __name__ == "__main__":
main()
@@ -0,0 +1,23 @@
#!/bin/bash
service ntpd start
service mysqld start
service cloudera-quickstart-init start
/home/cloudera/cloudera-manager --express
python /root/parcel-installer.py
cd /opt/cloudera/parcels/CDH-5.14.0-1.cdh5.14.0.p0.24/lib/hive/scripts/metastore/upgrade/mysql
mysql -u root -pcloudera -e 'DROP DATABASE metastore; CREATE DATABASE metastore'
mysql -u root -pcloudera metastore < hive-schema-1.1.0.mysql.sql
su hdfs -c "hdfs dfs -mkdir /hdp"
su hdfs -c "hdfs dfs -chown -R cloudera:cloudera /hdp"
sudo -u cloudera mkdir -p /home/cloudera/.cache
sudo -u cloudera touch /home/cloudera/.cache/schedoscope_history
sudo -u cloudera git clone https://github.com/ottogroup/schedoscope.git /home/cloudera/schedoscope
cd /home/cloudera/schedoscope/
sudo -u cloudera bash

0 comments on commit bdd41e4

Please sign in to comment.