Skip to content

Commit 6860cee

Browse files
committed
upgrade
1 parent 6378226 commit 6860cee

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

spark-jupyter/Dockerfile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ RUN ssh-keygen -t rsa -P "" -f /root/.ssh/id_rsa \
4343
COPY ubuntu/root/.ssh/config /root/.ssh/config
4444

4545
# setup hadoop
46-
RUN wget -q http://apache.mirrors.tds.net/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz -O /tmp/hadoop-3.2.1.tar.gz \
47-
&& tar -xzf /tmp/hadoop-3.2.1.tar.gz -C /usr/local/ \
48-
&& ln -s /usr/local/hadoop-3.2.1 /usr/local/hadoop \
46+
RUN wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.4.1/hadoop-3.4.1.tar.gz -O /tmp/hadoop.tar.gz \
47+
&& tar -xzf /tmp/hadoop.tar.gz -C /usr/local/ \
48+
&& ln -s /usr/local/hadoop-3.4.1 /usr/local/hadoop \
4949
&& rm -fr /usr/local/hadoop/etc/hadoop/* \
5050
&& mkdir /usr/local/hadoop/extras \
5151
&& mkdir /var/hadoop \
@@ -59,16 +59,16 @@ COPY ubuntu/usr/local/hadoop/extras/* /usr/local/hadoop/extras/
5959
RUN $HADOOP_HOME/bin/hdfs namenode -format oneoffcoder
6060

6161
# setup spark
62-
RUN wget -q https://archive.apache.org/dist/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz -O /tmp/spark-2.4.4-bin-hadoop2.7.tgz \
63-
&& tar -xzf /tmp/spark-2.4.4-bin-hadoop2.7.tgz -C /usr/local/ \
64-
&& ln -s /usr/local/spark-2.4.4-bin-hadoop2.7 /usr/local/spark \
62+
RUN wget -q https://dlcdn.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz -O /tmp/spark.tgz \
63+
&& tar -xzf /tmp/spark.tgz -C /usr/local/ \
64+
&& ln -s /usr/local/spark-3.5.5-bin-hadoop3 /usr/local/spark \
6565
&& rm /usr/local/spark/conf/*.template
6666
COPY ubuntu/usr/local/spark/conf/* /usr/local/spark/conf/
6767

6868
# setup conda
6969
COPY ubuntu/root/.jupyter /root/.jupyter/
7070
COPY ubuntu/root/ipynb/environment.yml /tmp/environment.yml
71-
RUN wget -q https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh -O /tmp/anaconda.sh \
71+
RUN wget -q https://repo.anaconda.com/archive/Anaconda3-2024.10-1-Linux-x86_64.sh -O /tmp/anaconda.sh \
7272
&& /bin/bash /tmp/anaconda.sh -b -p $CONDA_HOME \
7373
&& $CONDA_HOME/bin/conda env update -n base --file /tmp/environment.yml \
7474
&& $CONDA_HOME/bin/conda update -n root conda -y \

spark-jupyter/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
This docker container is meant to be used for learning purpose for programming PySpark. It has the following components.
66

7-
* Hadoop v3.2.1
8-
* Spark v2.4.4
9-
* Conda 3 with Python v3.7
7+
* Hadoop v3.4.1
8+
* Spark v3.5.5
9+
* Conda with Python v3.11
1010

1111
After running the container, you may visit the following pages.
1212

spark-jupyter/deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
ORGANIZATION=oneoffcoder
44
REPOSITORY=spark-jupyter
5-
VERSION=0.1.0
5+
VERSION=0.2.0
66
IMAGEID=spark-jupyter:local
77

88
echo ${IMAGEID}

0 commit comments

Comments
 (0)