From e2320d9fb830e91edf80f9226cdd60ccbe2e63b9 Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 17:53:15 +0200
Subject: [PATCH 1/6] Add config_path to Services

---
 flintrock/services.py | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/flintrock/services.py b/flintrock/services.py
index 8515ed11..7fa76b4a 100644
--- a/flintrock/services.py
+++ b/flintrock/services.py
@@ -107,9 +107,10 @@ def health_check(
 
 
 class HDFS(FlintrockService):
-    def __init__(self, *, version, download_source):
+    def __init__(self, *, version, download_source, config_path):
         self.version = version
         self.download_source = download_source
+        self.config_path = config_path
         self.manifest = {'version': version, 'download_source': download_source}
 
     def install(
@@ -149,13 +150,18 @@ def configure(
             cluster: FlintrockCluster):
         # TODO: os.walk() through these files.
         template_paths = [
-            'hadoop/conf/masters',
-            'hadoop/conf/slaves',
-            'hadoop/conf/hadoop-env.sh',
-            'hadoop/conf/core-site.xml',
-            'hadoop/conf/hdfs-site.xml',
+            'masters',
+            'slaves',
+            'hadoop-env.sh',
+            'core-site.xml',
+            'hdfs-site.xml',
         ]
 
+        # TODO: throw error if config folder / file can't be found
+        config_dir = self.config_path
+        if config_dir is None:
+            config_dir = os.path.join(THIS_DIR, "templates/hadoop/conf")
+
         for template_path in template_paths:
             ssh_check_output(
                 client=ssh_client,
@@ -164,7 +170,7 @@ def configure(
                 """.format(
                     f=shlex.quote(
                         get_formatted_template(
-                            path=os.path.join(THIS_DIR, "templates", template_path),
+                            path=os.path.join(config_dir, template_path),
                             mapping=generate_template_mapping(
                                 cluster=cluster,
                                 hadoop_version=self.version,
@@ -217,6 +223,7 @@ def __init__(
         version: str=None,
         hadoop_version: str,
         download_source: str=None,
+        config_path: str=None,
         git_commit: str=None,
         git_repository: str=None
     ):
@@ -230,6 +237,7 @@ def __init__(
         self.version = version
         self.hadoop_version = hadoop_version
         self.download_source = download_source
+        self.config_path = config_path
         self.git_commit = git_commit
         self.git_repository = git_repository
 
@@ -310,10 +318,15 @@ def configure(
             cluster: FlintrockCluster):
 
         template_paths = [
-            'spark/conf/spark-env.sh',
-            'spark/conf/slaves',
-            'spark/conf/spark-defaults.conf',
+            'spark-env.sh',
+            'slaves',
+            'spark-defaults.conf',
         ]
+
+        config_dir = self.config_path
+        if config_dir is None:
+            config_dir = os.path.join(THIS_DIR, "templates/spark/conf")
+
         for template_path in template_paths:
             ssh_check_output(
                 client=ssh_client,
@@ -322,7 +335,7 @@ def configure(
                 """.format(
                     f=shlex.quote(
                         get_formatted_template(
-                            path=os.path.join(THIS_DIR, "templates", template_path),
+                            path=os.path.join(config_dir, template_path),
                             mapping=generate_template_mapping(
                                 cluster=cluster,
                                 spark_executor_instances=self.spark_executor_instances,

From e7badc5a481b40540367c1681bbbd0041518e752 Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 19:30:53 +0200
Subject: [PATCH 2/6] Added {service}_config_path to click

---
 flintrock/flintrock.py | 10 +++++++++-
 flintrock/services.py  |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/flintrock/flintrock.py b/flintrock/flintrock.py
index 04948499..41286712 100644
--- a/flintrock/flintrock.py
+++ b/flintrock/flintrock.py
@@ -218,6 +218,7 @@ def cli(cli_context, config, provider, debug):
               help="URL to download Hadoop from.",
               default='http://www.apache.org/dyn/closer.lua/hadoop/common/hadoop-{v}/hadoop-{v}.tar.gz?as_json',
               show_default=True)
+@click.option('--hdfs-config-path')
 @click.option('--install-spark/--no-install-spark', default=True)
 @click.option('--spark-executor-instances', default=1,
               help="How many executor instances per worker.")
@@ -239,6 +240,7 @@ def cli(cli_context, config, provider, debug):
               help="Git repository to clone Spark from.",
               default='https://github.com/apache/spark',
               show_default=True)
+@click.option('--spark-config-path')
 @click.option('--assume-yes/--no-assume-yes', default=False)
 @click.option('--ec2-key-name')
 @click.option('--ec2-identity-file',
@@ -281,12 +283,14 @@ def launch(
         install_hdfs,
         hdfs_version,
         hdfs_download_source,
+        hdfs_config_path,
         install_spark,
         spark_executor_instances,
         spark_version,
         spark_git_commit,
         spark_git_repository,
         spark_download_source,
+        spark_config_path,
         assume_yes,
         ec2_key_name,
         ec2_identity_file,
@@ -351,7 +355,9 @@ def launch(
     check_external_dependency('ssh-keygen')
 
     if install_hdfs:
-        hdfs = HDFS(version=hdfs_version, download_source=hdfs_download_source)
+        hdfs = HDFS(version=hdfs_version,
+                    download_source=hdfs_download_source,
+                    config_path=hdfs_config_path)
         services += [hdfs]
     if install_spark:
         if spark_version:
@@ -360,6 +366,7 @@ def launch(
                 version=spark_version,
                 hadoop_version=hdfs_version,
                 download_source=spark_download_source,
+                config_path=spark_config_path
             )
         elif spark_git_commit:
             logger.warning(
@@ -373,6 +380,7 @@ def launch(
                 git_commit=spark_git_commit,
                 git_repository=spark_git_repository,
                 hadoop_version=hdfs_version,
+                config_path=spark_config_path
             )
         services += [spark]
 
diff --git a/flintrock/services.py b/flintrock/services.py
index 7fa76b4a..bda14610 100644
--- a/flintrock/services.py
+++ b/flintrock/services.py
@@ -179,7 +179,7 @@ def configure(
                                 spark_version='',
                                 spark_executor_instances=0,
                             ))),
-                    p=shlex.quote(template_path)))
+                    p=shlex.quote(os.path.join("hadoop/conf/",template_path))))
 
     # TODO: Convert this into start_master() and split master- or slave-specific
     #       stuff out of configure() into configure_master() and configure_slave().
@@ -342,7 +342,7 @@ def configure(
                                 hadoop_version=self.hadoop_version,
                                 spark_version=self.version or self.git_commit,
                             ))),
-                    p=shlex.quote(template_path)))
+                    p=shlex.quote(os.path.join("spark/conf/", template_path))))
 
     # TODO: Convert this into start_master() and split master- or slave-specific
     #       stuff out of configure() into configure_master() and configure_slave().

From cc0757c4558d08ed19e6d8fd6c1346bdae6de20a Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 19:47:12 +0200
Subject: [PATCH 3/6] Rename config_path to template_dir, add field to YAML
 template

---
 flintrock/config.yaml.template | 10 ++++++++++
 flintrock/flintrock.py         | 14 +++++++-------
 flintrock/services.py          | 24 ++++++++++++------------
 3 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/flintrock/config.yaml.template b/flintrock/config.yaml.template
index 460552a0..2d45cb9e 100644
--- a/flintrock/config.yaml.template
+++ b/flintrock/config.yaml.template
@@ -9,12 +9,22 @@ services:
     #   - must be a tar.gz file
     # download-source: "https://www.example.com/files/spark/{v}/spark-{v}.tar.gz"
     # executor-instances: 1
+    # template-dir: # folder containing spark configuration files:
+    #   - slaves
+    #   - spark-env.sh
+    #   - spark-defaults.conf
   hdfs:
     version: 2.7.3
     # optional; defaults to download from a dynamically selected Apache mirror
     #   - must contain a {v} template corresponding to the version
     #   - must be a .tar.gz file
     # download-source: "https://www.example.com/files/hadoop/{v}/hadoop-{v}.tar.gz"
+    # template-dir: # path to the folder containing the hadoop configuration files
+    #   - masters
+    #   - slaves
+    #   - hadoop-env.sh
+    #   - hdfs-site.xml
+    #   - core-site.xml
 
 provider: ec2
 
diff --git a/flintrock/flintrock.py b/flintrock/flintrock.py
index 41286712..c538f07e 100644
--- a/flintrock/flintrock.py
+++ b/flintrock/flintrock.py
@@ -218,7 +218,7 @@ def cli(cli_context, config, provider, debug):
               help="URL to download Hadoop from.",
               default='http://www.apache.org/dyn/closer.lua/hadoop/common/hadoop-{v}/hadoop-{v}.tar.gz?as_json',
               show_default=True)
-@click.option('--hdfs-config-path')
+@click.option('--hdfs-template-dir')
 @click.option('--install-spark/--no-install-spark', default=True)
 @click.option('--spark-executor-instances', default=1,
               help="How many executor instances per worker.")
@@ -240,7 +240,7 @@ def cli(cli_context, config, provider, debug):
               help="Git repository to clone Spark from.",
               default='https://github.com/apache/spark',
               show_default=True)
-@click.option('--spark-config-path')
+@click.option('--spark-template-dir')
 @click.option('--assume-yes/--no-assume-yes', default=False)
 @click.option('--ec2-key-name')
 @click.option('--ec2-identity-file',
@@ -283,14 +283,14 @@ def launch(
         install_hdfs,
         hdfs_version,
         hdfs_download_source,
-        hdfs_config_path,
+        hdfs_template_dir,
         install_spark,
         spark_executor_instances,
         spark_version,
         spark_git_commit,
         spark_git_repository,
         spark_download_source,
-        spark_config_path,
+        spark_template_dir,
         assume_yes,
         ec2_key_name,
         ec2_identity_file,
@@ -357,7 +357,7 @@ def launch(
     if install_hdfs:
         hdfs = HDFS(version=hdfs_version,
                     download_source=hdfs_download_source,
-                    config_path=hdfs_config_path)
+                    template_dir=hdfs_template_dir)
         services += [hdfs]
     if install_spark:
         if spark_version:
@@ -366,7 +366,7 @@ def launch(
                 version=spark_version,
                 hadoop_version=hdfs_version,
                 download_source=spark_download_source,
-                config_path=spark_config_path
+                template_dir=spark_template_dir
             )
         elif spark_git_commit:
             logger.warning(
@@ -380,7 +380,7 @@ def launch(
                 git_commit=spark_git_commit,
                 git_repository=spark_git_repository,
                 hadoop_version=hdfs_version,
-                config_path=spark_config_path
+                template_dir=spark_template_dir
             )
         services += [spark]
 
diff --git a/flintrock/services.py b/flintrock/services.py
index bda14610..4a9fa51a 100644
--- a/flintrock/services.py
+++ b/flintrock/services.py
@@ -107,10 +107,10 @@ def health_check(
 
 
 class HDFS(FlintrockService):
-    def __init__(self, *, version, download_source, config_path):
+    def __init__(self, *, version, download_source, template_dir):
         self.version = version
         self.download_source = download_source
-        self.config_path = config_path
+        self.template_dir = template_dir
         self.manifest = {'version': version, 'download_source': download_source}
 
     def install(
@@ -158,9 +158,9 @@ def configure(
         ]
 
         # TODO: throw error if config folder / file can't be found
-        config_dir = self.config_path
-        if config_dir is None:
-            config_dir = os.path.join(THIS_DIR, "templates/hadoop/conf")
+        template_dir = self.template_dir
+        if template_dir is None:
+            template_dir = os.path.join(THIS_DIR, "templates/hadoop/conf")
 
         for template_path in template_paths:
             ssh_check_output(
@@ -170,7 +170,7 @@ def configure(
                 """.format(
                     f=shlex.quote(
                         get_formatted_template(
-                            path=os.path.join(config_dir, template_path),
+                            path=os.path.join(template_dir, template_path),
                             mapping=generate_template_mapping(
                                 cluster=cluster,
                                 hadoop_version=self.version,
@@ -223,7 +223,7 @@ def __init__(
         version: str=None,
         hadoop_version: str,
         download_source: str=None,
-        config_path: str=None,
+        template_dir: str=None,
         git_commit: str=None,
         git_repository: str=None
     ):
@@ -237,7 +237,7 @@ def __init__(
         self.version = version
         self.hadoop_version = hadoop_version
         self.download_source = download_source
-        self.config_path = config_path
+        self.template_dir = template_dir
         self.git_commit = git_commit
         self.git_repository = git_repository
 
@@ -323,9 +323,9 @@ def configure(
             'spark-defaults.conf',
         ]
 
-        config_dir = self.config_path
-        if config_dir is None:
-            config_dir = os.path.join(THIS_DIR, "templates/spark/conf")
+        template_dir = self.template_dir
+        if template_dir is None:
+            template_dir = os.path.join(THIS_DIR, "templates/spark/conf")
 
         for template_path in template_paths:
             ssh_check_output(
@@ -335,7 +335,7 @@ def configure(
                 """.format(
                     f=shlex.quote(
                         get_formatted_template(
-                            path=os.path.join(config_dir, template_path),
+                            path=os.path.join(template_dir, template_path),
                             mapping=generate_template_mapping(
                                 cluster=cluster,
                                 spark_executor_instances=self.spark_executor_instances,

From 2431d035bd050683ec59f263fff99709dc5f9ae8 Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 20:22:01 +0200
Subject: [PATCH 4/6] Formatting

---
 flintrock/flintrock.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/flintrock/flintrock.py b/flintrock/flintrock.py
index c538f07e..14d855b6 100644
--- a/flintrock/flintrock.py
+++ b/flintrock/flintrock.py
@@ -355,9 +355,11 @@ def launch(
     check_external_dependency('ssh-keygen')
 
     if install_hdfs:
-        hdfs = HDFS(version=hdfs_version,
-                    download_source=hdfs_download_source,
-                    template_dir=hdfs_template_dir)
+        hdfs = HDFS(
+            version=hdfs_version,
+            download_source=hdfs_download_source,
+            template_dir=hdfs_template_dir
+        )
         services += [hdfs]
     if install_spark:
         if spark_version:

From 29e33fd696c33d139b5d977c2b66a34684f48c00 Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 21:11:12 +0200
Subject: [PATCH 5/6] Flake8 compliance

---
 flintrock/services.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flintrock/services.py b/flintrock/services.py
index 4a9fa51a..00717e6d 100644
--- a/flintrock/services.py
+++ b/flintrock/services.py
@@ -179,7 +179,7 @@ def configure(
                                 spark_version='',
                                 spark_executor_instances=0,
                             ))),
-                    p=shlex.quote(os.path.join("hadoop/conf/",template_path))))
+                    p=shlex.quote(os.path.join("hadoop/conf/", template_path))))
 
     # TODO: Convert this into start_master() and split master- or slave-specific
     #       stuff out of configure() into configure_master() and configure_slave().

From 4df4ed0387e1b4fe9f0d7e1160b3a547aa885679 Mon Sep 17 00:00:00 2001
From: Nicolas Cherel <nicolas.cherel@telecom-paristech.fr>
Date: Mon, 3 Jul 2017 22:02:45 +0200
Subject: [PATCH 6/6] Added template_dir to manifests

---
 flintrock/services.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/flintrock/services.py b/flintrock/services.py
index 00717e6d..f0bf79e9 100644
--- a/flintrock/services.py
+++ b/flintrock/services.py
@@ -111,7 +111,10 @@ def __init__(self, *, version, download_source, template_dir):
         self.version = version
         self.download_source = download_source
         self.template_dir = template_dir
-        self.manifest = {'version': version, 'download_source': download_source}
+        self.manifest = {
+            'version': version,
+            'download_source': download_source,
+            'template_dir': template_dir}
 
     def install(
             self,
@@ -246,6 +249,7 @@ def __init__(
             'spark_executor_instances': spark_executor_instances,
             'hadoop_version': hadoop_version,
             'download_source': download_source,
+            'template_dir': template_dir,
             'git_commit': git_commit,
             'git_repository': git_repository}