Skip to content

Commit

Permalink
Merge pull request #51 from porunov/master
Browse files Browse the repository at this point in the history
Add hadoop 3 and multicluster support
  • Loading branch information
sroegner committed Jul 31, 2018
2 parents a46f378 + 9d35cdb commit bcd218d
Show file tree
Hide file tree
Showing 25 changed files with 926 additions and 170 deletions.
20 changes: 20 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@ Which services end up running on a given host will again depend on the role(s) a
- hadoop_master will run the hadoop-resourcemanager service
- hadoop_slave will run the hadoop-nodemanager service

``hadoop.hdfs.uninstall``
---------------

Stops the hdfs services and uninstalls the hdfs service configuration. Removes hdfs data from local disks.

``hadoop.mapred.uninstall``
---------------

Uninstalls the mapreduce service scripts and configuration. Removes mapred data from local disks.

``hadoop.yarn.uninstall``
---------------

Uninstalls the yarn daemon scripts and configuration. Removes yarn data from local disks.

``hadoop.uninstall``
---------------

Uninstalls all Hadoop services and configurations.

Formula Dependencies
====================

Expand Down
2 changes: 1 addition & 1 deletion README_HA.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Since this feature is more complex than the already distributed Hadoop architect
b) Initialize Zookeeper for namenode HA (hdfs zkfc -formatZK)
c) Start namenode service as usual (service hadoop-namenode start)
d) Start the zookeeper fencing service (service hadoop-zkfc start)
8. On the designated "second" namenode (to become the standby member):
8. On the designated "second" namenodes (to become the standby members):
a) Prepare HDFS namenode metadata (hdfs namenode -prepareStandby)
b) Start namenode service as usual (service hadoop-namenode start)
c) Start the zookeeper fencing service (service hadoop-zkfc start)
Expand Down
2 changes: 2 additions & 0 deletions hadoop/conf/hadoop-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ export HADOOP_PREFIX={{ hadoop_home }}
export HADOOP_CONF_DIR={{ hadoop_config }}
export PATH=$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin:${JAVA_HOME}/bin:$PATH

# export HADOOP_COMMON_LIB_NATIVE_DIR=/usr/lib/hadoop/lib/native/

export HADOOP_HEAPSIZE=1024

export JMX_OPTS=" -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote -Djava.rmi.server.hostname=127.0.0.1"
Expand Down
1 change: 0 additions & 1 deletion hadoop/conf/mapred/mapred-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

{%- if major == '1' %}
<property>
<name>mapred.job.tracker</name>
Expand Down
70 changes: 65 additions & 5 deletions hadoop/conf/yarn/yarn-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,91 @@
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
{%- if yarn.resourcemanager_hosts|count() > 1 %}

{%- from 'zookeeper/settings.sls' import zk with context %}

<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>{{yarn.ha_cluster_id}}</value>
</property>

<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>
{%- for i in range(yarn.resourcemanager_hosts|count()) -%}
rm{{loop.index}}{{ '' if loop.last else ',' }}
{%- endfor -%}</value>
</property>

<property>
<name>yarn.resourcemanager.zk-address</name>
<value>{{ zk.connection_string }}</value>
</property>

{% for resourcemanager_host in yarn.resourcemanager_hosts %}

<property>
<name>yarn.resourcemanager.scheduler.address.rm{{loop.index}}</name>
<value>{{ resourcemanager_host }}:{{ yarn.scheduler_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.resource-tracker.address.rm{{loop.index}}</name>
<value>{{ resourcemanager_host }}:{{ yarn.resourcetracker_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.address.rm{{loop.index}}</name>
<value>{{ resourcemanager_host }}:{{ yarn.resourcemanager_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.admin.address.rm{{loop.index}}</name>
<value>{{ resourcemanager_host }}:{{ yarn.resourcemanager_admin_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.webapp.address.rm{{loop.index}}</name>
<value>{{ resourcemanager_host }}:{{ yarn.resourcemanager_webapp_port }}</value>
</property>

{%- endfor -%}

{%- else -%}

<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>{{ yarn.resourcemanager_host }}:{{ yarn.scheduler_port }}</value>
<value>{{ yarn.resourcemanager_hosts|first() }}:{{ yarn.scheduler_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>{{ yarn.resourcemanager_host }}:{{ yarn.resourcetracker_port }}</value>
<value>{{ yarn.resourcemanager_hosts|first() }}:{{ yarn.resourcetracker_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.address</name>
<value>{{ yarn.resourcemanager_host }}:{{ yarn.resourcemanager_port }}</value>
<value>{{ yarn.resourcemanager_hosts|first() }}:{{ yarn.resourcemanager_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.admin.address</name>
<value>{{ yarn.resourcemanager_host }}:{{ yarn.resourcemanager_admin_port }}</value>
<value>{{ yarn.resourcemanager_hosts|first() }}:{{ yarn.resourcemanager_admin_port }}</value>
</property>

<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>{{ yarn.resourcemanager_host }}:{{ yarn.resourcemanager_webapp_port }}</value>
<value>{{ yarn.resourcemanager_hosts|first() }}:{{ yarn.resourcemanager_webapp_port }}</value>
</property>

{%- endif -%}

<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:{{ yarn.nodemanager_port }}</value>
Expand Down
12 changes: 10 additions & 2 deletions hadoop/files/hadoop.init
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ if [ -f /etc/default/hadoop ]; then
. /etc/default/hadoop
fi

if [ -f /etc/init.d/functions ]; then
. /etc/init.d/functions
fi

if [ -f /etc/default/mapred-{{ hadoop_user }}-{{ hadoop_svc }} ] ; then
. /etc/default/mapred-{{ hadoop_user }}-{{ hadoop_svc }}
fi
Expand All @@ -64,9 +68,9 @@ SLEEP_TIME=5
PROC_NAME="java"

{%- if hadoop_major == 1 %}
{%- set bindir='bin' %}
{%- set bindir='bin' %}
{%- else %}
{%- set bindir='sbin' %}
{%- set bindir='sbin' %}
{%- endif %}

DAEMON="hadoop-{{ hadoop_user }}-{{ hadoop_svc }}"
Expand All @@ -92,6 +96,10 @@ PIDFILE="${PIDDIR}/hadoop-{{ hadoop_user }}-{{ hadoop_svc }}.pid"
EXEC_PATH="{{ hadoop_home }}/{{ bindir }}/hadoop-daemon.sh"
{%- endif %}

{%- if hadoop_major > 2 %}
PIDFILE="${PIDDIR}/hadoop-{{ hadoop_user }}-{{ hadoop_svc }}.pid"
{%- endif %}

start() {
[ -x $EXEC_PATH ] || exit $ERROR_PROGRAM_NOT_INSTALLED
[ -d $CONF_DIR ] || exit $ERROR_PROGRAM_NOT_CONFIGURED
Expand Down
43 changes: 43 additions & 0 deletions hadoop/files/hadoop.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{%- if hadoop_major == 1 %}
{%- set bindir='bin' %}
{%- else %}
{%- set bindir='sbin' %}
{%- endif %}

{%- set piddir="/var/run/hadoop" %}
{%- set conf_dir="/etc/hadoop/conf" %}

{%- if hadoop_svc == 'historyserver' %}
{%- set pidfile=piddir+"/mapred-"+hadoop_user+"-"+hadoop_svc+".pid" %}
{%- set exec_path=hadoop_home+"/"+bindir+"/mr-jobhistory-daemon.sh" %}
{%- elif hadoop_user == 'yarn' %}
{%- set pidfile=piddir+"/yarn-"+hadoop_user+"-"+hadoop_svc+".pid" %}
{%- set exec_path=hadoop_home+"/"+bindir+"/yarn-daemon.sh" %}
{%- else %}
{%- set pidfile=piddir+"/hadoop-"+hadoop_user+"-"+hadoop_svc+".pid" %}
{%- set exec_path=hadoop_home+"/"+bindir+"/hadoop-daemon.sh" %}
{%- endif %}

{%- if hadoop_major > 2 %}
{%- set pidfile=piddir+"/hadoop-"+hadoop_user+"-"+hadoop_svc+".pid" %}
{%- endif %}

[Unit]
Description=Hadoop DFS {{ hadoop_svc }}
After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target
Requires=network-online.target

[Service]
User={{ hadoop_user }}
Group=hadoop
Type=forking
RuntimeDirectory=hadoop
RuntimeDirectoryMode=775
ExecStart={{ exec_path }} --config {{ conf_dir }} start {{ hadoop_svc }}
ExecStop={{ exec_path }} --config {{ conf_dir }} stop {{ hadoop_svc }}
TimeoutStartSec=2min
Restart=on-failure
PIDFile={{ pidfile }}

[Install]
WantedBy=multi-user.target
2 changes: 2 additions & 0 deletions hadoop/files/hadoop.sh.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ export HADOOP_COMMON_HOME={{ alt_home }}
export HADOOP_HDFS_HOME={{ alt_home }}
export HADOOP_MAPRED_HOME={{ alt_home }}
export HADOOP_YARN_HOME={{ alt_home }}
# export HADOOP_COMMON_LIB_NATIVE_DIR={{ alt_home }}/lib/native
# export HADOOP_OPTS="-Djava.library.path={{ alt_home }}/lib/native"
2 changes: 1 addition & 1 deletion hadoop/hdfs/ha_namenode.sls
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ bootstrap-secondary-namenode:
{%- if hdfs.is_primary_namenode or hdfs.is_secondary_namenode %}
hdfs-services:
hdfs-ha-services:
service.running:
- enable: True
- names:
Expand Down
76 changes: 71 additions & 5 deletions hadoop/hdfs/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,13 @@ format-namenode:
- unless: test -d {{ test_folder }}
{%- endif %}
/etc/init.d/hadoop-namenode:
hadoop-namenode-service:
file.managed:
{%- if grains.get('systemd') %}
- name: /etc/systemd/system/hadoop-namenode.service
{% else %}
- name: /etc/init.d/hadoop-namenode
{% endif %}
- source: salt://hadoop/files/{{ hadoop.initscript }}
- user: root
- group: root
Expand All @@ -120,10 +125,25 @@ format-namenode:
hadoop_user: hdfs
hadoop_major: {{ hadoop.major_version }}
hadoop_home: {{ hadoop.alt_home }}
{%- if grains.get('systemd') %}
module.wait:
- name: service.systemctl_reload
- watch:
- file: hadoop-namenode-service
{% if hdfs.is_datanode or hdfs.is_journalnode %}
- watch_in:
- service: hdfs-services
{% endif %}
{% endif %}
{%- if hdfs.namenode_count == 1 %}
/etc/init.d/hadoop-secondarynamenode:
hadoop-secondarynamenode-service:
file.managed:
{%- if grains.get('systemd') %}
- name: /etc/systemd/system/hadoop-secondarynamenode.service
{% else %}
- name: /etc/init.d/hadoop-secondarynamenode
{% endif %}
- source: salt://hadoop/files/{{ hadoop.initscript }}
- user: root
- group: root
Expand All @@ -134,9 +154,22 @@ format-namenode:
hadoop_user: hdfs
hadoop_major: {{ hadoop.major_version }}
hadoop_home: {{ hadoop.alt_home }}
{%- if grains.get('systemd') %}
module.wait:
- name: service.systemctl_reload
- watch:
- file: hadoop-secondarynamenode-service
- watch_in:
- service: hdfs-nn-services
{% endif %}
{%- else %}
/etc/init.d/hadoop-zkfc:
hadoop-zkfc-service:
file.managed:
{%- if grains.get('systemd') %}
- name: /etc/systemd/system/hadoop-zkfc.service
{% else %}
- name: /etc/init.d/hadoop-zkfc
{% endif %}
- source: salt://hadoop/files/{{ hadoop.initscript }}
- user: root
- group: root
Expand All @@ -147,12 +180,24 @@ format-namenode:
hadoop_user: hdfs
hadoop_major: {{ hadoop.major_version }}
hadoop_home: {{ hadoop.alt_home }}
{%- if grains.get('systemd') %}
module.wait:
- name: service.systemctl_reload
- watch:
- file: hadoop-zkfc-service
{% endif %}
{% endif %}
{% endif %}
{% if hdfs.is_datanode %}
/etc/init.d/hadoop-datanode:
hadoop-datanode-service:
file.managed:
{%- if grains.get('systemd') %}
- name: /etc/systemd/system/hadoop-datanode.service
{% else %}
- name: /etc/init.d/hadoop-datanode
{% endif %}
- source: salt://hadoop/files/{{ hadoop.initscript }}
- user: root
- group: root
Expand All @@ -163,11 +208,24 @@ format-namenode:
hadoop_user: hdfs
hadoop_major: {{ hadoop.major_version }}
hadoop_home: {{ hadoop.alt_home }}
{%- if grains.get('systemd') %}
module.wait:
- name: service.systemctl_reload
- watch:
- file: hadoop-datanode-service
- watch_in:
- service: hdfs-services
{% endif %}
{% endif %}
{% if hdfs.is_journalnode %}
/etc/init.d/hadoop-journalnode:
hadoop-journalnode-service:
file.managed:
{%- if grains.get('systemd') %}
- name: /etc/systemd/system/hadoop-journalnode.service
{% else %}
- name: /etc/init.d/hadoop-journalnode
{% endif %}
- source: salt://hadoop/files/{{ hadoop.initscript }}
- user: root
- group: root
Expand All @@ -178,6 +236,14 @@ format-namenode:
hadoop_user: hdfs
hadoop_major: {{ hadoop.major_version }}
hadoop_home: {{ hadoop.alt_home }}
{%- if grains.get('systemd') %}
module.wait:
- name: service.systemctl_reload
- watch:
- file: hadoop-journalnode-service
- watch_in:
- service: hdfs-services
{% endif %}
{% endif %}
{% if hdfs.is_namenode and hdfs.namenode_count == 1 %}
Expand Down
Loading

0 comments on commit bcd218d

Please sign in to comment.