-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
29e6317
commit 420fc49
Showing
9 changed files
with
230 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
name: Deploy Docker image | ||
|
||
on: | ||
release: | ||
types: [published] | ||
|
||
# Do not run when README gets updated | ||
paths-ignore: | ||
- '**/README.md' | ||
|
||
# Can trigger action manually | ||
workflow_dispatch: | ||
|
||
env: | ||
REGISTRY: ghcr.io | ||
IMAGE_NAME: ${{ github.repository }} | ||
|
||
jobs: | ||
build-and-deploy-image: | ||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: read | ||
packages: write | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v2 | ||
|
||
- name: Log in to the container registry | ||
uses: docker/login-action@v1 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Extract metadata for Docker | ||
id: meta | ||
uses: docker/metadata-action@v3 | ||
with: | ||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||
|
||
- name: Build & push Docker image | ||
uses: docker/build-push-action@v2 | ||
with: | ||
context: . | ||
push: true | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# Ubuntu as the base image | ||
FROM ubuntu:20.04 | ||
|
||
# Set working directory to /home | ||
WORKDIR / | ||
|
||
# Install required dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
openjdk-8-jdk \ | ||
openssh-server \ | ||
openssh-client \ | ||
nano \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Generate SSH key pair for password less login | ||
RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa \ | ||
&& cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys \ | ||
&& chmod 0600 ~/.ssh/authorized_keys | ||
|
||
# Download Hadoop 3.3.1 | ||
RUN wget https://mirrors.estointernet.in/apache/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz | ||
|
||
# Unzip the .tar.gz | ||
RUN tar xzf hadoop-3.3.1.tar.gz | ||
|
||
# Remove the .tar.gz file | ||
RUN rm ./hadoop-3.3.1.tar.gz | ||
|
||
# Hadoop home | ||
ENV HADOOP_HOME=/hadoop-3.3.1 | ||
|
||
# Other Hadoop environment variables | ||
ENV HADOOP_INSTALL=${HADOOP_HOME} \ | ||
HADOOP_MAPRED_HOME=${HADOOP_HOME} \ | ||
HADOOP_COMMON_HOME=${HADOOP_HOME} \ | ||
HADOOP_HDFS_HOME=${HADOOP_HOME} \ | ||
YARN_HOME=${HADOOP_HOME} \ | ||
HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_HOME}/lib/native \ | ||
PATH=$PATH:${HADOOP_HOME}/sbin:${HADOOP_HOME}/bin \ | ||
HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/nativ" \ | ||
|
||
# Java home | ||
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ \ | ||
|
||
# For start-all.sh | ||
HDFS_NAMENODE_USER="root" \ | ||
HDFS_DATANODE_USER="root" \ | ||
HDFS_SECONDARYNAMENODE_USER="root" \ | ||
YARN_RESOURCEMANAGER_USER="root" \ | ||
YARN_NODEMANAGER_USER="root" | ||
|
||
# Dump environment variables since connecting | ||
# to localhost via SSH wipes them out | ||
RUN env | grep _ >> /etc/environment | ||
|
||
# Copy Hadoop configuration files to the "etc" directory | ||
COPY /etc/* ${HADOOP_HOME}/etc/hadoop/ | ||
|
||
# Copy bootstrap.sh | ||
COPY ./bootstrap.sh / | ||
|
||
CMD [ "bash", "./bootstrap.sh" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,41 @@ | ||
# hadoop-docker | ||
Apache Hadoop's Pseudo Distributed Mode using Docker. 馃惓 | ||
# Apache Hadoop using Docker 馃惓 | ||
|
||
A Docker image to play around with [Apache Hadoop](https://hadoop.apache.org) in [Pseudo Distributed Mode](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html) (single cluster mode). | ||
|
||
### Below are the steps to play around with this image using [Play with Docker](https://labs.play-with-docker.com). | ||
|
||
1. First of all, create an account on [Docker Hub](https://hub.docker.com/signup). | ||
2. Login to [Play with Docker](https://labs.play-with-docker.com) using the Docker Hub account you just created. | ||
3. You should see a green "Start" button, click on it to start a session. | ||
4. Create an instance by clicking on "+ Add new instance" in the left pane, to create a VM. | ||
5. A new terminal should show up in the right pane. Here, we need to pull the Docker image from _Github Container Registry (GHCR)_. | ||
To do so, execute: | ||
|
||
```bash | ||
docker pull ghcr.io/max-rocco/hadoop-docker:main | ||
``` | ||
|
||
6. After the image has been pulled into the VM, we need to start a new container & switch into it's terminal (mostly bash). | ||
To do so, execute: | ||
|
||
```bash | ||
docker run -it ghcr.io/max-rocco/hadoop-docker:main | ||
``` | ||
|
||
_At this stage, the image will be booting up by executing all the required steps to start Hadoop._ | ||
|
||
**From now on, you will be inside container's bash (terminal) and can start using Hadoop's filesystem commands.** 馃殌 | ||
|
||
<hr /> | ||
|
||
[![Deploy Docker image](https://github.com/max-rocco/hadoop-docker/actions/workflows/build.yml/badge.svg)](https://github.com/max-rocco/hadoop-docker/actions/workflows/build.yml) | ||
|
||
<hr /> | ||
|
||
<div align="center"> | ||
|
||
```txt | ||
D. Kasi Pavan Kumar (c) 2021 | ||
``` | ||
|
||
</div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
# 1. Start the SSH server | ||
/etc/init.d/ssh start | ||
|
||
# 2. Connect to "localhost" via SSH | ||
# https://askubuntu.com/a/123080 | ||
ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -t localhost << EOF | ||
# 3. Source the environment variables dumped in /etc/enviroment | ||
source /etc/environment | ||
# 4. Set Hadoop's "sbin" & "bin" in path | ||
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin | ||
EOF | ||
|
||
# 3. Format "Namenode" | ||
hdfs namenode -format | ||
|
||
# 4. Start all Hadoop services | ||
$HADOOP_HOME/sbin/start-all.sh | ||
|
||
# 5. Leave user with the shell | ||
/bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<configuration> | ||
<property> | ||
<name>hadoop.tmp.dir</name> | ||
<value>/home/tmpdata</value> | ||
</property> | ||
<property> | ||
<name>fs.default.name</name> | ||
<value>hdfs://127.0.0.1:9000</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<configuration> | ||
<property> | ||
<name>dfs.data.dir</name> | ||
<value>/home/dfsdata/namenode</value> | ||
</property> | ||
<property> | ||
<name>dfs.data.dir</name> | ||
<value>/home/dfsdata/datanode</value> | ||
</property> | ||
<property> | ||
<name>dfs.replication</name> | ||
<value>1</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<configuration> | ||
<property> | ||
<name>mapreduce.framework.name</name> | ||
<value>yarn</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<configuration> | ||
<property> | ||
<name>yarn.nodemanager.aux-services</name> | ||
<value>mapreduce_shuffle</value> | ||
</property> | ||
<property> | ||
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> | ||
<value>org.apache.hadoop.mapred.ShuffleHandler</value> | ||
</property> | ||
<property> | ||
<name>yarn.resourcemanager.hostname</name> | ||
<value>127.0.0.1</value> | ||
</property> | ||
<property> | ||
<name>yarn.acl.enable</name> | ||
<value>0</value> | ||
</property> | ||
<property> | ||
<name>yarn.nodemanager.env-whitelist</name> | ||
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> | ||
</property> | ||
</configuration> |