From fa6f188c23d69a6bb3a368accb50e977da9a2883 Mon Sep 17 00:00:00 2001 From: timifasubaa <30888507+timifasubaa@users.noreply.github.com> Date: Mon, 25 Jun 2018 12:04:27 -0700 Subject: [PATCH 1/3] specify hve namespace for tables (#5268) (cherry picked from commit bd24f854c96390e53bdfb0a3e5c3122928340acf) --- superset/config.py | 4 ++++ superset/db_engine_specs.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/superset/config.py b/superset/config.py index 458ada644282..4de0ebd46ac9 100644 --- a/superset/config.py +++ b/superset/config.py @@ -315,6 +315,10 @@ class CeleryConfig(object): # contain all the external tables CSV_TO_HIVE_UPLOAD_DIRECTORY = 'EXTERNAL_HIVE_TABLES/' +# The namespace within hive where the tables created from +# uploading CSVs will be stored. +UPLOADED_CSV_HIVE_NAMESPACE = None + # A dictionary of items that gets merged into the Jinja context for # SQL Lab. The existing context gets updated with this dictionary, # meaning values for existing keys get overwritten by the content of this diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index a63bb0461e97..0c37a942b7c8 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -910,6 +910,14 @@ def get_column_names(filepath): return next(unicodecsv.reader(f, encoding='utf-8-sig')) table_name = form.name.data + if config.get('UPLOADED_CSV_HIVE_NAMESPACE'): + if '.' in table_name: + raise Exception( + "You can't specify a namespace. " + 'All tables will be uploaded to the `{}` namespace'.format( + config.get('HIVE_NAMESPACE'))) + table_name = '{}.{}'.format( + config.get('UPLOADED_CSV_HIVE_NAMESPACE'), table_name) filename = form.csv_file.data.filename bucket_path = app.config['CSV_TO_HIVE_UPLOAD_S3_BUCKET'] From 467c0434bdd4854d42f135ed6dc66572f6496ff2 Mon Sep 17 00:00:00 2001 From: timifasubaa <30888507+timifasubaa@users.noreply.github.com> Date: Fri, 6 Jul 2018 09:46:53 -0700 Subject: [PATCH 2/3] use schema form field in upload csv (#5303) (cherry picked from commit 28ba5a9ddb20ff8129d288481d72c91c579b199e) --- superset/db_engine_specs.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 0c37a942b7c8..f03489262fa2 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -910,17 +910,26 @@ def get_column_names(filepath): return next(unicodecsv.reader(f, encoding='utf-8-sig')) table_name = form.name.data + schema_name = form.schema.data + if config.get('UPLOADED_CSV_HIVE_NAMESPACE'): - if '.' in table_name: + if '.' in table_name or schema_name: raise Exception( "You can't specify a namespace. " 'All tables will be uploaded to the `{}` namespace'.format( config.get('HIVE_NAMESPACE'))) table_name = '{}.{}'.format( config.get('UPLOADED_CSV_HIVE_NAMESPACE'), table_name) - filename = form.csv_file.data.filename + else: + if '.' in table_name and schema_name: + raise Exception( + "You can't specify a namespace both in the name of the table " + 'and in the schema field. Please remove one') + if schema_name: + table_name = '{}.{}'.format(schema_name, table_name) - bucket_path = app.config['CSV_TO_HIVE_UPLOAD_S3_BUCKET'] + filename = form.csv_file.data.filename + bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET'] if not bucket_path: logging.info('No upload bucket specified') From 866e6440b8e59a9a7606dd408ab0647ea12a4f15 Mon Sep 17 00:00:00 2001 From: timifasubaa <30888507+timifasubaa@users.noreply.github.com> Date: Thu, 21 Jun 2018 16:01:34 -0700 Subject: [PATCH 3/3] Update db_engine_specs.py (#5264) (cherry picked from commit 0e5293b9beae7dbef4e40c7f88aa0c3e6c506057) --- superset/db_engine_specs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index f03489262fa2..bf6ec99cbde4 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -950,7 +950,7 @@ def get_column_names(filepath): s3.upload_file( upload_path, bucket_path, os.path.join(upload_prefix, table_name, filename)) - sql = """CREATE EXTERNAL TABLE {table_name} ( {schema_definition} ) + sql = """CREATE TABLE {table_name} ( {schema_definition} ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION '{location}' tblproperties ('skip.header.line.count'='1')""".format(**locals())