From c81ab34a22b28213d6b4b0968133251b33bf22fd Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Tue, 9 Jan 2024 10:13:49 -0800 Subject: [PATCH 01/26] praser for CREATE EXTERNAL TABLE --- src/Parsers/ASTCreateExternalTableQuery.cpp | 40 +++++++++++ src/Parsers/ASTCreateExternalTableQuery.h | 25 +++++++ .../ParserCreateExternalTableQuery.cpp | 70 +++++++++++++++++++ src/Parsers/ParserCreateExternalTableQuery.h | 18 +++++ src/Parsers/ParserQueryWithOutput.cpp | 7 +- 5 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 src/Parsers/ASTCreateExternalTableQuery.cpp create mode 100644 src/Parsers/ASTCreateExternalTableQuery.h create mode 100644 src/Parsers/ParserCreateExternalTableQuery.cpp create mode 100644 src/Parsers/ParserCreateExternalTableQuery.h diff --git a/src/Parsers/ASTCreateExternalTableQuery.cpp b/src/Parsers/ASTCreateExternalTableQuery.cpp new file mode 100644 index 00000000000..1f8303d2a76 --- /dev/null +++ b/src/Parsers/ASTCreateExternalTableQuery.cpp @@ -0,0 +1,40 @@ +#include +#include + +namespace DB +{ + +ASTPtr ASTCreateExternalTableQuery::clone() const +{ + auto res = std::make_shared(*this); + + if (settings) + res->set(res->settings, settings->clone()); + + cloneOutputOptions(*res); + cloneTableOptions(*res); + + return res; +} + + void ASTCreateExternalTableQuery::formatQueryImpl(const FormatSettings & fmt_settings, FormatState & state, FormatStateStacked frame) const +{ + auto & ostr = fmt_settings.ostr; + auto hilite = fmt_settings.hilite; + + ostr << (hilite ? hilite_keyword : "") + << "CREATE " + << (create_or_replace ? "OR REPLACE " : "") + << "EXTERNAL STREAM " + << (if_not_exists ? "IF NOT EXISTS " : "") + << (hilite ? hilite_none : "") + << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); + + if (settings) + { + ostr << (hilite ? hilite_keyword : "") << fmt_settings.nl_or_ws << "SETTINGS " << (hilite ? hilite_none : ""); + settings->formatImpl(fmt_settings, state, frame); + } +} + +} diff --git a/src/Parsers/ASTCreateExternalTableQuery.h b/src/Parsers/ASTCreateExternalTableQuery.h new file mode 100644 index 00000000000..5c840a9e261 --- /dev/null +++ b/src/Parsers/ASTCreateExternalTableQuery.h @@ -0,0 +1,25 @@ +#include +#include + +namespace DB +{ + +class ASTCreateExternalTableQuery : public ASTQueryWithTableAndOutput +{ +public: + bool create_or_replace {false}; + bool if_not_exists {false}; + + ASTSetQuery * settings; + + String getID(char delim) const override { return "CreateExternalTableQuery" + (delim + getDatabase()) + delim + getTable(); } + + ASTPtr clone() const override; + + QueryKind getQueryKind() const override { return QueryKind::Create; } + +protected: + void formatQueryImpl(const FormatSettings & fmt_settings, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ParserCreateExternalTableQuery.cpp b/src/Parsers/ParserCreateExternalTableQuery.cpp new file mode 100644 index 00000000000..189eecb8fd3 --- /dev/null +++ b/src/Parsers/ParserCreateExternalTableQuery.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include "Parsers/ASTIdentifier.h" + +namespace DB +{ + +bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected, [[ maybe_unused ]] bool hint) +{ + ParserKeyword s_create("CREATE"); + ParserKeyword s_or_replace("OR REPLACE"); + ParserKeyword s_external_table("EXTERNAL TABLE"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_settings("SETTINGS"); + + ParserCompoundIdentifier table_name_p(true, true); + ParserSetQuery settings_p(/* parse_only_internals_ = */ true); + + ASTPtr table; + ASTPtr settings; + + bool or_replace = false; + bool if_not_exists = false; + + if (!s_create.ignore(pos, expected)) + return false; + + if (s_or_replace.ignore(pos, expected)) + or_replace = true; + + if (s_external_table.ignore(pos, expected)) + return false; + + if (!or_replace && s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + + if (!table_name_p.parse(pos, table, expected)) + return false; + + if (s_settings.ignore(pos, expected)) + { + if (!settings_p.parse(pos, settings, expected)) + return false; + } + + auto query = std::make_shared(); + node = query; + + query->create_or_replace = or_replace; + query->if_not_exists = if_not_exists; + + auto * table_id = table->as(); + query->database = table_id->getDatabase(); + query->table = table_id->getTable(); + if (query->database) + query->children.push_back(query->database); + if (query->table) + query->children.push_back(query->table); + + query->settings = settings; + if (query->settings) + query->children.push_back(query->settings); + + return true; +} + +} diff --git a/src/Parsers/ParserCreateExternalTableQuery.h b/src/Parsers/ParserCreateExternalTableQuery.h new file mode 100644 index 00000000000..910807d71f1 --- /dev/null +++ b/src/Parsers/ParserCreateExternalTableQuery.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace DB +{ + +/// Query like this: +/// CREATE [OR REPLACE] EXTERNAL TABLE [IF NOT EXISTS] [db.]name [UUID 'uuid'] +/// [SETTINGS name = value, ...] +class ParserCreateExternalTableQuery : public DB::IParserBase +{ +protected: + const char * getName() const override { return "CREATE EXTERNAL TABLE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected, [[ maybe_unused ]] bool hint) override; +}; + +} diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index d1d01a33fb8..dbf04265750 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -24,9 +24,10 @@ #include #include #include -#include "Common/Exception.h" +#include /// proton : starts +#include #include #include #include @@ -64,6 +65,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec /// proton: starts ParserShowFormatSchemasQuery show_format_schemas_p; ParserShowCreateFormatSchemaQuery show_create_format_schema_p; + ParserCreateExternalTableQuery create_external_table_p; /// proton: ends ASTPtr query; @@ -84,6 +86,9 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || describe_table_p.parse(pos, query, expected) || show_processlist_p.parse(pos, query, expected) || create_p.parse(pos, query, expected) + /// proton: starts + || create_external_table_p.parse(pos, query, expected) + /// proton: ends || alter_p.parse(pos, query, expected) || rename_p.parse(pos, query, expected) || drop_p.parse(pos, query, expected) From 95e2a11159365d0e02b26e1b63d9a6cefb30b86c Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 14 Jan 2024 01:58:49 -0800 Subject: [PATCH 02/26] basic CREATE/DROP EXTERNAL TABLE --- src/CMakeLists.txt | 2 + src/Interpreters/InterpreterDropQuery.cpp | 15 ++++++ src/Parsers/ASTCreateExternalTableQuery.cpp | 40 -------------- src/Parsers/ASTCreateExternalTableQuery.h | 25 --------- src/Parsers/ASTCreateQuery.cpp | 5 +- src/Parsers/ASTDropQuery.h | 4 ++ .../ParserCreateExternalTableQuery.cpp | 37 ++++++------- src/Parsers/ParserCreateExternalTableQuery.h | 2 +- src/Parsers/ParserDropExternalTableQuery.cpp | 52 +++++++++++++++++++ src/Parsers/ParserDropExternalTableQuery.h | 17 ++++++ src/Parsers/ParserQueryWithOutput.cpp | 7 ++- src/Storages/ExternalTable/CMakeLists.txt | 11 ++++ .../ExternalTable/ExternalTableSettings.cpp | 39 ++++++++++++++ .../ExternalTable/ExternalTableSettings.h | 26 ++++++++++ .../ExternalTable/StorageExternalTable.cpp | 43 +++++++++++++++ .../ExternalTable/StorageExternalTable.h | 33 ++++++++++++ src/Storages/IStorage.h | 2 + src/Storages/registerStorages.cpp | 2 + 18 files changed, 276 insertions(+), 86 deletions(-) delete mode 100644 src/Parsers/ASTCreateExternalTableQuery.cpp delete mode 100644 src/Parsers/ASTCreateExternalTableQuery.h create mode 100644 src/Parsers/ParserDropExternalTableQuery.cpp create mode 100644 src/Parsers/ParserDropExternalTableQuery.h create mode 100644 src/Storages/ExternalTable/CMakeLists.txt create mode 100644 src/Storages/ExternalTable/ExternalTableSettings.cpp create mode 100644 src/Storages/ExternalTable/ExternalTableSettings.h create mode 100644 src/Storages/ExternalTable/StorageExternalTable.cpp create mode 100644 src/Storages/ExternalTable/StorageExternalTable.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 97bf7b74a79..5d061ec3cdb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -248,6 +248,7 @@ else() endif() add_subdirectory(Storages/ExternalStream) +add_subdirectory(Storages/ExternalTable) # proton: end set (DBMS_COMMON_LIBRARIES) @@ -264,6 +265,7 @@ endif() target_link_libraries (dbms PRIVATE klog) target_link_libraries (dbms PRIVATE external_stream) +target_link_libraries (dbms PRIVATE external_table) target_link_libraries (dbms PRIVATE checkpoint) set (all_modules dbms) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 57a6267b223..72d9622ec14 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -120,6 +120,11 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue throw Exception(ErrorCodes::INCORRECT_QUERY, "It {} is not a Dictionary", table_id.getNameForLogs()); /// proton: ends + /// proton: starts + if (ast_drop_query.is_external_table && !table->isExternalTable()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "It {} is not a External Table", table_id.getNameForLogs()); + /// proton: ends + /// Now get UUID, so we can wait for table data to be finally dropped table_id.uuid = database->tryGetTableUUID(table_id.table_name); @@ -137,6 +142,11 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue if (query.kind == ASTDropQuery::Kind::Detach) { + /// proton: starts + if (table->isExternalTable()) + throw Exception("Cannot DETACH external table", ErrorCodes::SYNTAX_ERROR); + /// proton: ends + context_->checkAccess(drop_storage, table_id); if (table->isDictionary()) @@ -174,6 +184,11 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue } else if (query.kind == ASTDropQuery::Kind::Truncate) { + /// proton: starts + if (table->isExternalTable()) + throw Exception("Cannot TRUNCATE external table", ErrorCodes::SYNTAX_ERROR); + /// proton: ends + if (table->isDictionary()) throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR); diff --git a/src/Parsers/ASTCreateExternalTableQuery.cpp b/src/Parsers/ASTCreateExternalTableQuery.cpp deleted file mode 100644 index 1f8303d2a76..00000000000 --- a/src/Parsers/ASTCreateExternalTableQuery.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include -#include - -namespace DB -{ - -ASTPtr ASTCreateExternalTableQuery::clone() const -{ - auto res = std::make_shared(*this); - - if (settings) - res->set(res->settings, settings->clone()); - - cloneOutputOptions(*res); - cloneTableOptions(*res); - - return res; -} - - void ASTCreateExternalTableQuery::formatQueryImpl(const FormatSettings & fmt_settings, FormatState & state, FormatStateStacked frame) const -{ - auto & ostr = fmt_settings.ostr; - auto hilite = fmt_settings.hilite; - - ostr << (hilite ? hilite_keyword : "") - << "CREATE " - << (create_or_replace ? "OR REPLACE " : "") - << "EXTERNAL STREAM " - << (if_not_exists ? "IF NOT EXISTS " : "") - << (hilite ? hilite_none : "") - << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); - - if (settings) - { - ostr << (hilite ? hilite_keyword : "") << fmt_settings.nl_or_ws << "SETTINGS " << (hilite ? hilite_none : ""); - settings->formatImpl(fmt_settings, state, frame); - } -} - -} diff --git a/src/Parsers/ASTCreateExternalTableQuery.h b/src/Parsers/ASTCreateExternalTableQuery.h deleted file mode 100644 index 5c840a9e261..00000000000 --- a/src/Parsers/ASTCreateExternalTableQuery.h +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - -namespace DB -{ - -class ASTCreateExternalTableQuery : public ASTQueryWithTableAndOutput -{ -public: - bool create_or_replace {false}; - bool if_not_exists {false}; - - ASTSetQuery * settings; - - String getID(char delim) const override { return "CreateExternalTableQuery" + (delim + getDatabase()) + delim + getTable(); } - - ASTPtr clone() const override; - - QueryKind getQueryKind() const override { return QueryKind::Create; } - -protected: - void formatQueryImpl(const FormatSettings & fmt_settings, FormatState & state, FormatStateStacked frame) const override; -}; - -} diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index c76fd41b0ad..608349d77e9 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -36,7 +36,8 @@ ASTPtr ASTStorage::clone() const void ASTStorage::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - if (engine) + if (engine + /*proton: starts*/ && engine->name != "ExternalTable" /*proton: ends*/) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "ENGINE" << (s.hilite ? hilite_none : "") << " = "; engine->formatImpl(s, state, frame); @@ -277,6 +278,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat what = "MATERIALIZED VIEW"; else if (is_random) what = "RANDOM STREAM"; + else if (storage && storage->engine && storage->engine->name == "ExternalTable") + what = "EXTERNAL TABLE"; /// proton: ends. settings.ostr diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h index 2e67eaf3692..ffda9138fa2 100644 --- a/src/Parsers/ASTDropQuery.h +++ b/src/Parsers/ASTDropQuery.h @@ -33,6 +33,10 @@ class ASTDropQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnClu bool no_delay{false}; + /// proton: starts + bool is_external_table{false}; + /// proton: ends + // We detach the object permanently, so it will not be reattached back during server restart. bool permanently{false}; diff --git a/src/Parsers/ParserCreateExternalTableQuery.cpp b/src/Parsers/ParserCreateExternalTableQuery.cpp index 189eecb8fd3..5d67859a9ba 100644 --- a/src/Parsers/ParserCreateExternalTableQuery.cpp +++ b/src/Parsers/ParserCreateExternalTableQuery.cpp @@ -1,9 +1,9 @@ -#include #include -#include +#include +#include +#include #include #include -#include "Parsers/ASTIdentifier.h" namespace DB { @@ -31,7 +31,7 @@ bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Exp if (s_or_replace.ignore(pos, expected)) or_replace = true; - if (s_external_table.ignore(pos, expected)) + if (!s_external_table.ignore(pos, expected)) return false; if (!or_replace && s_if_not_exists.ignore(pos, expected)) @@ -46,23 +46,24 @@ bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Exp return false; } - auto query = std::make_shared(); - node = query; + auto create_query = std::make_shared(); + node = create_query; - query->create_or_replace = or_replace; - query->if_not_exists = if_not_exists; + create_query->create_or_replace = or_replace; + create_query->if_not_exists = if_not_exists; auto * table_id = table->as(); - query->database = table_id->getDatabase(); - query->table = table_id->getTable(); - if (query->database) - query->children.push_back(query->database); - if (query->table) - query->children.push_back(query->table); - - query->settings = settings; - if (query->settings) - query->children.push_back(query->settings); + create_query->database = table_id->getDatabase(); + create_query->table = table_id->getTable(); + if (create_query->database) + create_query->children.push_back(create_query->database); + if (create_query->table) + create_query->children.push_back(create_query->table); + + auto storage = std::make_shared(); + storage->set(storage->engine, makeASTFunction("ExternalTable")); + storage->set(storage->settings, settings); + create_query->set(create_query->storage, storage); return true; } diff --git a/src/Parsers/ParserCreateExternalTableQuery.h b/src/Parsers/ParserCreateExternalTableQuery.h index 910807d71f1..be45586bb33 100644 --- a/src/Parsers/ParserCreateExternalTableQuery.h +++ b/src/Parsers/ParserCreateExternalTableQuery.h @@ -6,7 +6,7 @@ namespace DB { /// Query like this: -/// CREATE [OR REPLACE] EXTERNAL TABLE [IF NOT EXISTS] [db.]name [UUID 'uuid'] +/// CREATE [OR REPLACE] EXTERNAL TABLE [IF NOT EXISTS] [db.]name /// [SETTINGS name = value, ...] class ParserCreateExternalTableQuery : public DB::IParserBase { diff --git a/src/Parsers/ParserDropExternalTableQuery.cpp b/src/Parsers/ParserDropExternalTableQuery.cpp new file mode 100644 index 00000000000..8b1ac9a4f17 --- /dev/null +++ b/src/Parsers/ParserDropExternalTableQuery.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool DB::ParserDropExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected, [[ maybe_unused ]] bool hint) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_external_table("EXTERNAL TABLE"); + ParserKeyword s_if_exists("IF EXISTS"); + + ParserCompoundIdentifier table_name_p(true, true); + + ASTPtr table; + + bool if_exists = false; + + if (!s_drop.ignore(pos, expected)) + return false; + + if (!s_external_table.ignore(pos, expected)) + return false; + + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + + if (!table_name_p.parse(pos, table, expected)) + return false; + + auto query = std::make_shared(); + node = query; + + query->kind = ASTDropQuery::Drop; + query->is_external_table = true; + query->if_exists = if_exists; + + auto * table_id = table->as(); + query->database = table_id->getDatabase(); + query->table = table_id->getTable(); + if (query->database) + query->children.push_back(query->database); + if (query->table) + query->children.push_back(query->table); + + return true; +} + +} diff --git a/src/Parsers/ParserDropExternalTableQuery.h b/src/Parsers/ParserDropExternalTableQuery.h new file mode 100644 index 00000000000..5da057c5977 --- /dev/null +++ b/src/Parsers/ParserDropExternalTableQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace DB +{ + +/// Query like this: +/// DROP EXTERNAL TABLE [IF NOT EXISTS] [db.]name +class ParserDropExternalTableQuery : public DB::IParserBase +{ +protected: + const char * getName() const override { return "DROP EXTERNAL TABLE query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected, [[ maybe_unused ]] bool hint) override; +}; + +} diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index dbf04265750..485b2874103 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -28,6 +28,7 @@ /// proton : starts #include +#include #include #include #include @@ -52,6 +53,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserAlterQuery alter_p; ParserRenameQuery rename_p; ParserDropQuery drop_p; + ParserDropExternalTableQuery drop_external_table_p; ParserCheckQuery check_p; ParserOptimizeQuery optimize_p; ParserKillQueryQuery kill_query_p; @@ -85,13 +87,16 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || describe_cache_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) || show_processlist_p.parse(pos, query, expected) - || create_p.parse(pos, query, expected) /// proton: starts || create_external_table_p.parse(pos, query, expected) /// proton: ends + || create_p.parse(pos, query, expected) || alter_p.parse(pos, query, expected) || rename_p.parse(pos, query, expected) || drop_p.parse(pos, query, expected) + /// proton: starts + || drop_external_table_p.parse(pos, query, expected) + /// proton: ends || check_p.parse(pos, query, expected) || kill_query_p.parse(pos, query, expected) || optimize_p.parse(pos, query, expected) diff --git a/src/Storages/ExternalTable/CMakeLists.txt b/src/Storages/ExternalTable/CMakeLists.txt new file mode 100644 index 00000000000..c0af182a69d --- /dev/null +++ b/src/Storages/ExternalTable/CMakeLists.txt @@ -0,0 +1,11 @@ +include("${proton_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") + +add_headers_and_sources(external_table .) + +add_library(external_table ${external_table_headers} ${external_table_sources}) + +target_link_libraries(external_table PUBLIC clickhouse_parsers) + +# if (ENABLE_TESTS) +# add_subdirectory(tests) +# endif () diff --git a/src/Storages/ExternalTable/ExternalTableSettings.cpp b/src/Storages/ExternalTable/ExternalTableSettings.cpp new file mode 100644 index 00000000000..103b1a7e564 --- /dev/null +++ b/src/Storages/ExternalTable/ExternalTableSettings.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(ExternalTableSettingsTraits, LIST_OF_EXTERNAL_TABLE_SETTINGS) + +void ExternalTableSettings::loadFromQuery(ASTStorage & storage) +{ + if (storage.settings) + { + try + { + applyChanges(storage.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage.set(storage.settings, settings_ast); + } +} + +} diff --git a/src/Storages/ExternalTable/ExternalTableSettings.h b/src/Storages/ExternalTable/ExternalTableSettings.h new file mode 100644 index 00000000000..546f7bb82b1 --- /dev/null +++ b/src/Storages/ExternalTable/ExternalTableSettings.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTStorage; + +#define LIST_OF_EXTERNAL_TABLE_SETTINGS(M) \ + M(String, type, "", "External table type", 0) \ + /* ClickHouse settings */ \ + M(String, address, "", "The address of the ClickHouse server to connect", 0) \ + M(String, table, "", "The ClickHouse table to which the external table is mapped", 0) + +DECLARE_SETTINGS_TRAITS(ExternalTableSettingsTraits, LIST_OF_EXTERNAL_TABLE_SETTINGS) + + +/// Settings for the ExternalTable engine. +/// Could be loaded from a CREATE EXTERNAL TABLE query (SETTINGS clause). +struct ExternalTableSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp new file mode 100644 index 00000000000..060e78c8db2 --- /dev/null +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +namespace DB +{ + +StorageExternalTable::StorageExternalTable( + const StorageID & table_id_, + std::unique_ptr /*external_table_settings_*/, + ContextPtr context_) +: IStorage(table_id_) +, WithContext(context_->getGlobalContext()) +{ + std::cout << "External table created" << std::endl; +} + +void registerStorageExternalTable(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) + { + if (args.storage_def->settings) + { + auto settings = std::make_unique(); + settings->loadFromQuery(*args.storage_def); + + return StorageExternalTable::create(args.table_id, std::move(settings), args.getContext()); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External table requires correct settings setup"); + }; + + factory.registerStorage( + "ExternalTable", + creator_fn, + StorageFactory::StorageFeatures{ + .supports_settings = true, + .supports_schema_inference = true, + }); +} + +} diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h new file mode 100644 index 00000000000..b232484ef1d --- /dev/null +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -0,0 +1,33 @@ +#pragma once + +#include "Interpreters/Context_fwd.h" +#include "Storages/ExternalTable/ExternalTableSettings.h" +#include "Storages/IStorage.h" +#include "base/shared_ptr_helper.h" + +namespace DB +{ + +class StorageExternalTable final : public shared_ptr_helper, public IStorage, public WithContext +{ + friend struct shared_ptr_helper; + +public: + String getName() const override { return "ExternalTable"; } + + bool isRemote() const override { return true; } + bool isExternalTable() const override { return true; } + + /// FIXME + void startup() override { std::cout << "ExternalTable startup" << std::endl; } + /// FIXME + void shutdown() override { std::cout << "ExternalTable shutdown" << std::endl; } + +protected: + StorageExternalTable( + const StorageID & table_id_, + std::unique_ptr external_table_settings_, + ContextPtr context_); +}; + +} diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f746c7eabc4..8a64ed71cc2 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -234,6 +234,8 @@ class IStorage : public std::enable_shared_from_this, public TypePromo virtual bool supportsAccurateSeekTo() const noexcept { return false; } virtual bool supportsStreamingQuery() const { return false; } + + virtual bool isExternalTable() const { return false; } /// proton: ends. /// Return list of virtual columns (like _part, _table, etc). In the vast diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 11ebaa2f61e..20544c5fb13 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -20,6 +20,7 @@ void registerStorageView(StorageFactory & factory); /// proton: starts. void registerStorageStream(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); +void registerStorageExternalTable(StorageFactory & factory); void registerStorageExternalStream(StorageFactory & factory); void registerStorageRandom(StorageFactory & factory); /// proton: ends. @@ -59,6 +60,7 @@ void registerStorages() /// proton: starts. registerStorageStream(factory); registerStorageMaterializedView(factory); + registerStorageExternalTable(factory); registerStorageExternalStream(factory); registerStorageRandom(factory); /// proton: ends. From 3d4add531b2239eb23f02749f7cb073b305ed511 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Wed, 17 Jan 2024 13:11:37 -0800 Subject: [PATCH 03/26] DB::Client --- src/Client/Client.cpp | 208 ++++++++++++++++++ src/Client/Client.h | 45 ++++ src/Storages/ExternalTable/CMakeLists.txt | 2 +- .../ExternalTable/ClickHouse/ClickHouse.cpp | 65 ++++++ .../ExternalTable/ClickHouse/ClickHouse.h | 35 +++ .../ExternalTable/ExternalTableFactory.cpp | 33 +++ .../ExternalTable/ExternalTableFactory.h | 25 +++ .../ExternalTable/ExternalTableImpl.h | 25 +++ .../ExternalTable/ExternalTableSettings.h | 2 + .../ExternalTable/StorageExternalTable.cpp | 21 +- .../ExternalTable/StorageExternalTable.h | 35 ++- 11 files changed, 484 insertions(+), 12 deletions(-) create mode 100644 src/Client/Client.cpp create mode 100644 src/Client/Client.h create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouse.h create mode 100644 src/Storages/ExternalTable/ExternalTableFactory.cpp create mode 100644 src/Storages/ExternalTable/ExternalTableFactory.h create mode 100644 src/Storages/ExternalTable/ExternalTableImpl.h diff --git a/src/Client/Client.cpp b/src/Client/Client.cpp new file mode 100644 index 00000000000..0d9929809f0 --- /dev/null +++ b/src/Client/Client.cpp @@ -0,0 +1,208 @@ +#include +#include "Processors/Chunk.h" + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int DEADLOCK_AVOIDED; +extern const int TIMEOUT_EXCEEDED; +extern const int UNKNOWN_PACKET_FROM_SERVER; +} + +Client::Client(IConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) + : connection(connection_) + , timeouts(timeouts_) + , context(context_) + , logger(logger_) +{} + +void Client::executeQuery(String query, const Callbacks & callbacks) +{ + size_t processed_rows {0}; + int retries_left = 10; + while (retries_left) + { + try + { + connection->sendQuery( + timeouts, + query, + {}, + "", + QueryProcessingStage::Complete, + nullptr, + nullptr, + true); + + receiveResult(callbacks); + + break; + } + catch (const Exception & e) + { + /// Retry when the server said "Client should retry" and no rows + /// has been received yet. + if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left) + LOG_ERROR(logger, "Got a transient error from the server, will retry ({} retries left)", retries_left); + else + throw; + } + } +} + +/// Receives and processes packets coming from server. +/// Also checks if query execution should be cancelled. +void Client::receiveResult(const Callbacks & callbacks) +{ + const auto receive_timeout = timeouts.receive_timeout; + constexpr size_t default_poll_interval = 1000000; /// in microseconds + constexpr size_t min_poll_interval = 5000; /// in microseconds + const size_t poll_interval + = std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); + + while (true) + { + Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE); + + while (true) + { + /// Has the Ctrl+C been pressed and thus the query should be cancelled? + /// If this is the case, inform the server about it and receive the remaining packets + /// to avoid losing sync. + if (!cancelled) + { + double elapsed = receive_watch.elapsedSeconds(); + if (elapsed > receive_timeout.totalSeconds()) + { + cancelQuery(); + + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded while receiving data from server. Waited for {} seconds, timeout is {} seconds", static_cast(elapsed), receive_timeout.totalSeconds()); + + } + } + + /// Poll for changes after a cancellation check, otherwise it never reached + /// because of progress updates from server. + + if (connection->poll(poll_interval)) + break; + } + + if (!receiveAndProcessPacket(cancelled, callbacks)) + break; + } + + if (cancelled) + LOG_INFO(logger, "Query was cancelled."); +} + +void Client::cancelQuery() +{ + connection->sendCancel(); + cancelled = true; +} + +/// Receive a part of the result, or progress info or an exception and process it. +/// Returns true if one should continue receiving packets. +/// Output of result is suppressed if query was cancelled. +bool Client::receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks) +{ + Packet packet = connection->receivePacket(); + + Chunk chunk {}; + + switch (packet.type) + { + case Protocol::Server::PartUUIDs: + return true; + + case Protocol::Server::Data: + if (!cancelled_) + callbacks.on_data(packet.block); + return true; + + case Protocol::Server::Progress: + callbacks.on_progress(packet.progress); + return true; + + case Protocol::Server::ProfileInfo: + callbacks.on_profile_info(packet.profile_info); + return true; + + case Protocol::Server::Totals: + if (!cancelled_) + callbacks.on_totals(packet.block); + return true; + + case Protocol::Server::Extremes: + if (!cancelled_) + callbacks.on_extremes(packet.block); + return true; + + case Protocol::Server::Exception: + callbacks.on_receive_exception_from_server(std::move(packet.exception)); + return false; + + case Protocol::Server::Log: + callbacks.on_log_data(packet.block); + return true; + + case Protocol::Server::EndOfStream: + callbacks.on_end_of_stream(); + return false; + + case Protocol::Server::ProfileEvents: + callbacks.on_profile_events(packet.block); + return true; + + default: + throw Exception( + ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); + } +} + +// void Client::onProgress(const Progress & value) +// { +// LOG_INFO(logger, "onProgress called with read_rows = {}", value.read_rows); +// } +// +// void Client::onData(Block & block) +// { +// /// TBD +// } +// +// void Client::onLogData(Block & block) { +// LOG_INFO(logger, "onLogData called with columns = {}, rows = {}", block.columns(), block.rows()); +// } +// +// void Client::onTotals(Block & block) +// { +// LOG_INFO(logger, "onTotals called with columns = {}, rows = {}", block.columns(), block.rows()); +// } +// +// void Client::onExtremes(Block & block) +// { +// LOG_INFO(logger, "onExtremes called with columns = {}, rows = {}", block.columns(), block.rows()); +// } +// +// void Client::onReceiveExceptionFromServer(std::unique_ptr && e) +// { +// LOG_INFO(logger, "received server exception: {}", e->what()); +// } +// +// void Client::onProfileInfo(const ProfileInfo & profile_info) +// { +// LOG_INFO(logger, "received ProfileInfo: rows={}", profile_info.rows); +// } +// void Client::onEndOfStream() +// { +// LOG_INFO(logger, "received EndOfStream"); +// } +// void Client::onProfileEvents(Block & block) +// { +// LOG_INFO(logger, "received ProfileEvents rows = {}", block.rows()); +// } + +} diff --git a/src/Client/Client.h b/src/Client/Client.h new file mode 100644 index 00000000000..99e2b39c5d3 --- /dev/null +++ b/src/Client/Client.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +struct Callbacks +{ + std::function on_progress; + std::function on_data; + std::function on_log_data; + std::function on_totals; + std::function on_extremes; + std::function && e)> on_receive_exception_from_server; + std::function on_profile_info; + std::function on_end_of_stream; + std::function on_profile_events; +}; + +/// Client is for using as a library client without all the complexities for handling terminal stuff like ClientBase does. +class Client final +{ +public: + Client(IConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); + + void executeQuery(String query, const Callbacks & callbacks); + void cancelQuery(); + +private: + void receiveResult(const Callbacks & callbacks); + bool receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks); + + IConnectionPool::Entry connection; + ConnectionTimeouts timeouts; + + std::atomic_bool cancelled {false}; + + ContextPtr & context; + Poco::Logger * logger; +}; + +} diff --git a/src/Storages/ExternalTable/CMakeLists.txt b/src/Storages/ExternalTable/CMakeLists.txt index c0af182a69d..bd39e1ba864 100644 --- a/src/Storages/ExternalTable/CMakeLists.txt +++ b/src/Storages/ExternalTable/CMakeLists.txt @@ -4,7 +4,7 @@ add_headers_and_sources(external_table .) add_library(external_table ${external_table_headers} ${external_table_sources}) -target_link_libraries(external_table PUBLIC clickhouse_parsers) +target_link_libraries(external_table PUBLIC clickhouse_parsers ch_contrib::abseil_swiss_tables) # if (ENABLE_TESTS) # add_subdirectory(tests) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp new file mode 100644 index 00000000000..f0cfc8fed6e --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -0,0 +1,65 @@ +#include +#include "Client/Client.h" + +namespace DB +{ + +namespace ExternalTable +{ + +ClickHouse::ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_) + : table(settings->table.value) + , context(context_) + , logger(&Poco::Logger::get("External-" + settings->address.value + "-" + settings->table.value)) +{ + assert(settings->type.value == "clickhouse"); + + auto addr = settings->address.value; + auto pos = addr.find_first_of(':'); + if (pos == String::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid ClickHouse address, expected format ':'"); + auto host = addr.substr(0, pos); + auto port = std::stoi(addr.substr(pos + 1)); + if (!port) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid port in ClickHouse address"); + + connection_pool = ConnectionPoolFactory::instance().get( + 100 /*max_connections*/, + host, port, + "default" /*default_database*/, + "default" /*user*/, + "" /*password*/, + "" /*quota_key*/, + "" /*cluster*/, + "" /*cluster_secret*/, + "Timeplus Proton" /*client_name*/, + Protocol::Compression::Enable, + Protocol::Secure::Disable, + 0 /*priority*/); + + timeouts = ConnectionTimeouts( + 10 * 60 * 1'000'000 /*connection_timeout_*/, + 10 * 60 * 1'000'000 /*send_timeout_*/, + 10 * 60 * 1'000'000 /*receive_timeout_*/ + ); +} + +void ClickHouse::startup() +{ + NameToNameMap map; + auto conn = connection_pool->get(timeouts); + Client client {conn, timeouts, context, logger}; + client.executeQuery("DESCRIBE TABLE " + table, { + .on_data = [log = logger](Block & block) + { + LOG_INFO(log, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); + for (size_t i = 0; i < block.rows(); ++i) + { + } + } + }); +} + +} + +} diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h new file mode 100644 index 00000000000..691c7c54c91 --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +namespace ExternalTable +{ + +class ClickHouse final : public IExternalTable +{ +public: + explicit ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_); + + void startup() override; + void shutdown() override {} + + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; + +private: + ConnectionPoolPtr connection_pool; + ConnectionTimeouts timeouts; + + String table; + + ContextPtr & context; + Poco::Logger * logger; +}; + +} + +} diff --git a/src/Storages/ExternalTable/ExternalTableFactory.cpp b/src/Storages/ExternalTable/ExternalTableFactory.cpp new file mode 100644 index 00000000000..dcc5bba72d8 --- /dev/null +++ b/src/Storages/ExternalTable/ExternalTableFactory.cpp @@ -0,0 +1,33 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int UNKNOWN_TYPE; +} + +ExternalTableFactory & ExternalTableFactory::instance() +{ + static DB::ExternalTableFactory ret; + return ret; +} + +void ExternalTableFactory::registerExternalTable(const std::string & type, Creator creator) +{ + if (creators.contains(type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ExternalTableFactory: type {} is already registered", type); + + creators[type] = std::move(creator); +} + +IExternalTablePtr ExternalTableFactory::getExternalTable(const std::string & type, ExternalTableSettingsPtr settings) const +{ + if (!creators.contains(type)) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown external table type {}", type); + + return creators.at(type)(std::move(settings)); +} + +} diff --git a/src/Storages/ExternalTable/ExternalTableFactory.h b/src/Storages/ExternalTable/ExternalTableFactory.h new file mode 100644 index 00000000000..e8cc52781ac --- /dev/null +++ b/src/Storages/ExternalTable/ExternalTableFactory.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "Storages/ExternalTable/ExternalTableImpl.h" +#include "Storages/ExternalTable/ExternalTableSettings.h" + +namespace DB +{ + +/// Allows to create an IExternalTable by the name of they type. +class ExternalTableFactory final : private boost::noncopyable +{ +public: + static ExternalTableFactory & instance(); + + using Creator = std::function; + + IExternalTablePtr getExternalTable(const std::string & type, ExternalTableSettingsPtr settings) const; + void registerExternalTable(const std::string & type, Creator creator); + +private: + std::unordered_map creators; +}; + +} diff --git a/src/Storages/ExternalTable/ExternalTableImpl.h b/src/Storages/ExternalTable/ExternalTableImpl.h new file mode 100644 index 00000000000..f556c83cbf8 --- /dev/null +++ b/src/Storages/ExternalTable/ExternalTableImpl.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/// The interface for an External Table implementation to implement. +class IExternalTable +{ +public: + virtual ~IExternalTable() = default; + + virtual void startup() = 0; + virtual void shutdown() = 0; + + virtual SinkToStoragePtr write(const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr /* context */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Ingesting data to this type of external stream is not supported"); +} +}; + +using IExternalTablePtr = std::unique_ptr; + +} diff --git a/src/Storages/ExternalTable/ExternalTableSettings.h b/src/Storages/ExternalTable/ExternalTableSettings.h index 546f7bb82b1..ca983f9547d 100644 --- a/src/Storages/ExternalTable/ExternalTableSettings.h +++ b/src/Storages/ExternalTable/ExternalTableSettings.h @@ -23,4 +23,6 @@ struct ExternalTableSettings : public BaseSettings void loadFromQuery(ASTStorage & storage_def); }; +using ExternalTableSettingsPtr = std::unique_ptr; + } diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index 060e78c8db2..1e10a0d7327 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -1,19 +1,35 @@ +#include +#include +#include #include #include #include #include +#include "Storages/ExternalTable/ClickHouse/ClickHouse.h" namespace DB { StorageExternalTable::StorageExternalTable( const StorageID & table_id_, - std::unique_ptr /*external_table_settings_*/, + std::unique_ptr settings, ContextPtr context_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) { - std::cout << "External table created" << std::endl; + auto type = settings->type.value; + if (type == "clickhouse") + external_table = std::make_unique(std::move(settings)); + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown external table type: {}", type); +} + +SinkToStoragePtr StorageExternalTable::write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context_) +{ + return external_table->write(query, metadata_snapshot, context_); } void registerStorageExternalTable(StorageFactory & factory) @@ -40,4 +56,5 @@ void registerStorageExternalTable(StorageFactory & factory) }); } + } diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index b232484ef1d..87bcecd8dfb 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -1,9 +1,10 @@ #pragma once -#include "Interpreters/Context_fwd.h" -#include "Storages/ExternalTable/ExternalTableSettings.h" -#include "Storages/IStorage.h" -#include "base/shared_ptr_helper.h" +#include +#include +#include +#include +#include namespace DB { @@ -18,16 +19,32 @@ class StorageExternalTable final : public shared_ptr_helperstartup(); } + void shutdown() override { external_table->shutdown(); } + + SinkToStoragePtr write( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + ContextPtr /*context*/) override; protected: StorageExternalTable( const StorageID & table_id_, - std::unique_ptr external_table_settings_, + std::unique_ptr settings, ContextPtr context_); + +private: + IExternalTablePtr external_table; + + /// TBD + // Pipe read( + // const Names & /*column_names*/, + // const StorageSnapshotPtr & /*storage_snapshot*/, + // SelectQueryInfo & /*query_info*/, + // ContextPtr /*context*/, + // QueryProcessingStage::Enum /*processed_stage*/, + // size_t /*max_block_size*/, + // size_t /*num_streams*/) override; }; } From 458dc866176c6c9fb586f42b5c25a70b8c1e1648 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Wed, 24 Jan 2024 18:41:59 -0800 Subject: [PATCH 04/26] debug --- src/Client/Client.h | 2 +- src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp | 7 +++++++ src/Storages/ExternalTable/StorageExternalTable.cpp | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Client/Client.h b/src/Client/Client.h index 99e2b39c5d3..4d5802e2ba0 100644 --- a/src/Client/Client.h +++ b/src/Client/Client.h @@ -38,7 +38,7 @@ class Client final std::atomic_bool cancelled {false}; - ContextPtr & context; + ContextPtr & context [[maybe_unused]]; Poco::Logger * logger; }; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index f0cfc8fed6e..31dd5ae59df 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -53,8 +53,15 @@ void ClickHouse::startup() .on_data = [log = logger](Block & block) { LOG_INFO(log, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); + auto cols = block.getColumns(); for (size_t i = 0; i < block.rows(); ++i) { + String msg = "row " + std::to_string(i) + " :"; + for (const auto & col : cols) + { + msg += col->getName() + ": "; + msg += (*col)[i].getTypeName(); + } } } }); diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index 1e10a0d7327..5d7ecbdbd8d 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -19,7 +19,10 @@ StorageExternalTable::StorageExternalTable( { auto type = settings->type.value; if (type == "clickhouse") - external_table = std::make_unique(std::move(settings)); + { + auto ctx = getContext(); + external_table = std::make_unique(std::move(settings), ctx); + } else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown external table type: {}", type); } From b36cb9aaa9013d18a24fd8bed856a3bb2be4b6f3 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Fri, 26 Jan 2024 15:28:12 -0800 Subject: [PATCH 05/26] storage memory metadata --- src/Client/Connection.cpp | 25 ++++ src/Client/Connection.h | 12 ++ src/Client/{Client.cpp => LibClient.cpp} | 54 ++++---- src/Client/{Client.h => LibClient.h} | 10 +- src/Common/IFactoryWithAliases.h | 47 +++++++ .../ClickHouseDataTypeTranslator.cpp | 118 ++++++++++++++++++ src/DataTypes/ClickHouseDataTypeTranslator.h | 23 ++++ src/DataTypes/DataTypeAggregateFunction.cpp | 2 + src/DataTypes/DataTypeArray.cpp | 2 + .../DataTypeCustomSimpleAggregateFunction.cpp | 2 + src/DataTypes/DataTypeDate.cpp | 2 + src/DataTypes/DataTypeDomainBool.cpp | 2 + src/DataTypes/DataTypeEnum.cpp | 4 + src/DataTypes/DataTypeFactory.cpp | 18 +-- src/DataTypes/DataTypeFactory.h | 7 +- src/DataTypes/DataTypeFixedString.cpp | 2 + src/DataTypes/DataTypeIPv4andIPv6.cpp | 3 + src/DataTypes/DataTypeInterval.cpp | 12 ++ src/DataTypes/DataTypeLowCardinality.cpp | 2 + src/DataTypes/DataTypeMap.cpp | 2 + src/DataTypes/DataTypeNested.cpp | 4 +- src/DataTypes/DataTypeNothing.cpp | 2 + src/DataTypes/DataTypeNullable.cpp | 2 + src/DataTypes/DataTypeString.cpp | 2 + src/DataTypes/DataTypeTuple.cpp | 2 + src/DataTypes/DataTypeUUID.cpp | 2 + src/DataTypes/DataTypesDecimal.cpp | 7 ++ src/DataTypes/DataTypesNumber.cpp | 28 +++++ src/DataTypes/IDataTypeTranslator.h | 14 +++ src/DataTypes/registerDataTypeDateTime.cpp | 4 + src/Formats/NativeReader.cpp | 8 +- src/Formats/NativeReader.h | 12 ++ src/Storages/ExternalTable/CMakeLists.txt | 1 + .../ExternalTable/ClickHouse/ClickHouse.cpp | 101 +++++++++++---- .../ExternalTable/ClickHouse/ClickHouse.h | 8 +- .../ClickHouse/ClickHouseSink.cpp | 23 ++++ .../ExternalTable/ClickHouse/ClickHouseSink.h | 29 +++++ .../ExternalTable/ExternalTableImpl.h | 2 + .../ExternalTable/ExternalTableSettings.h | 2 + .../ExternalTable/StorageExternalTable.cpp | 35 +++--- .../ExternalTable/StorageExternalTable.h | 9 +- 41 files changed, 562 insertions(+), 84 deletions(-) rename src/Client/{Client.cpp => LibClient.cpp} (74%) rename src/Client/{Client.h => LibClient.h} (78%) create mode 100644 src/DataTypes/ClickHouseDataTypeTranslator.cpp create mode 100644 src/DataTypes/ClickHouseDataTypeTranslator.h create mode 100644 src/DataTypes/IDataTypeTranslator.h create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index c7d5d441203..15c5b6a2331 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -985,6 +985,10 @@ void Connection::initBlockInput() } block_in = std::make_unique(*maybe_compressed_in, server_revision); + /// proton: starts + if (data_type_translator) + block_in->setDataTypeTranslator(data_type_translator); + /// proton: ends } } @@ -995,6 +999,10 @@ void Connection::initBlockLogsInput() { /// Have to return superset of SystemLogsQueue::getSampleBlock() columns block_logs_in = std::make_unique(*in, server_revision); + /// proton: starts + if (data_type_translator) + block_logs_in->setDataTypeTranslator(data_type_translator); + /// proton: ends } } @@ -1004,6 +1012,10 @@ void Connection::initBlockProfileEventsInput() if (!block_profile_events_in) { block_profile_events_in = std::make_unique(*in, server_revision); + /// proton: starts + if (block_profile_events_in) + block_profile_events_in->setDataTypeTranslator(data_type_translator); + /// proton: ends } } @@ -1085,4 +1097,17 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.security); } +/// proton: starts +void Connection::setDataTypeTranslator(IDataTypeTranslator * translator) +{ + data_type_translator = translator; + if (block_in) + block_in->setDataTypeTranslator(data_type_translator); + if (block_logs_in) + block_logs_in->setDataTypeTranslator(data_type_translator); + if (block_profile_events_in) + block_profile_events_in->setDataTypeTranslator(data_type_translator); +} +/// proton: ends + } diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 243420b0593..0f0a3d13080 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -35,6 +35,10 @@ using Connections = std::vector; class NativeReader; class NativeWriter; +/// proton: starts +class IDataTypeTranslator; +/// proton: ends + /** Connection with database server, to use by client. * How to use - see Core/Protocol.h @@ -155,6 +159,10 @@ class Connection : public IServerConnection if (in) in->setAsyncCallback(std::move(async_callback)); } + + /// proton: starts + void setDataTypeTranslator(IDataTypeTranslator * translator); + /// proton: ends private: String host; UInt16 port; @@ -273,6 +281,10 @@ class Connection : public IServerConnection void initBlockProfileEventsInput(); [[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const; + + /// proton: starts + IDataTypeTranslator * data_type_translator = nullptr; + /// proton: ends }; class AsyncCallbackSetter diff --git a/src/Client/Client.cpp b/src/Client/LibClient.cpp similarity index 74% rename from src/Client/Client.cpp rename to src/Client/LibClient.cpp index 0d9929809f0..0e33bf5ad6a 100644 --- a/src/Client/Client.cpp +++ b/src/Client/LibClient.cpp @@ -1,4 +1,4 @@ -#include +#include #include "Processors/Chunk.h" namespace DB @@ -11,14 +11,14 @@ extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_PACKET_FROM_SERVER; } -Client::Client(IConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) +LibClient::LibClient(ConnectionPtr connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) : connection(connection_) , timeouts(timeouts_) , context(context_) , logger(logger_) {} -void Client::executeQuery(String query, const Callbacks & callbacks) +void LibClient::executeQuery(String query, const Callbacks & callbacks) { size_t processed_rows {0}; int retries_left = 10; @@ -54,7 +54,7 @@ void Client::executeQuery(String query, const Callbacks & callbacks) /// Receives and processes packets coming from server. /// Also checks if query execution should be cancelled. -void Client::receiveResult(const Callbacks & callbacks) +void LibClient::receiveResult(const Callbacks & callbacks) { const auto receive_timeout = timeouts.receive_timeout; constexpr size_t default_poll_interval = 1000000; /// in microseconds @@ -98,7 +98,7 @@ void Client::receiveResult(const Callbacks & callbacks) LOG_INFO(logger, "Query was cancelled."); } -void Client::cancelQuery() +void LibClient::cancelQuery() { connection->sendCancel(); cancelled = true; @@ -107,7 +107,7 @@ void Client::cancelQuery() /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. /// Output of result is suppressed if query was cancelled. -bool Client::receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks) +bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks) { Packet packet = connection->receivePacket(); @@ -124,37 +124,45 @@ bool Client::receiveAndProcessPacket(bool cancelled_, const Callbacks & callback return true; case Protocol::Server::Progress: - callbacks.on_progress(packet.progress); + if (callbacks.on_progress) + callbacks.on_progress(packet.progress); return true; case Protocol::Server::ProfileInfo: - callbacks.on_profile_info(packet.profile_info); + if (callbacks.on_profile_info) + callbacks.on_profile_info(packet.profile_info); return true; case Protocol::Server::Totals: if (!cancelled_) - callbacks.on_totals(packet.block); + if (callbacks.on_totals) + callbacks.on_totals(packet.block); return true; case Protocol::Server::Extremes: if (!cancelled_) - callbacks.on_extremes(packet.block); + if (callbacks.on_extremes) + callbacks.on_extremes(packet.block); return true; case Protocol::Server::Exception: - callbacks.on_receive_exception_from_server(std::move(packet.exception)); + if (callbacks.on_receive_exception_from_server) + callbacks.on_receive_exception_from_server(std::move(packet.exception)); return false; case Protocol::Server::Log: - callbacks.on_log_data(packet.block); + if (callbacks.on_log_data) + callbacks.on_log_data(packet.block); return true; case Protocol::Server::EndOfStream: - callbacks.on_end_of_stream(); + if (callbacks.on_end_of_stream) + callbacks.on_end_of_stream(); return false; case Protocol::Server::ProfileEvents: - callbacks.on_profile_events(packet.block); + if (callbacks.on_profile_events) + callbacks.on_profile_events(packet.block); return true; default: @@ -163,44 +171,44 @@ bool Client::receiveAndProcessPacket(bool cancelled_, const Callbacks & callback } } -// void Client::onProgress(const Progress & value) +// void LibClient::onProgress(const Progress & value) // { // LOG_INFO(logger, "onProgress called with read_rows = {}", value.read_rows); // } // -// void Client::onData(Block & block) +// void LibClient::onData(Block & block) // { // /// TBD // } // -// void Client::onLogData(Block & block) { +// void LibClient::onLogData(Block & block) { // LOG_INFO(logger, "onLogData called with columns = {}, rows = {}", block.columns(), block.rows()); // } // -// void Client::onTotals(Block & block) +// void LibClient::onTotals(Block & block) // { // LOG_INFO(logger, "onTotals called with columns = {}, rows = {}", block.columns(), block.rows()); // } // -// void Client::onExtremes(Block & block) +// void LibClient::onExtremes(Block & block) // { // LOG_INFO(logger, "onExtremes called with columns = {}, rows = {}", block.columns(), block.rows()); // } // -// void Client::onReceiveExceptionFromServer(std::unique_ptr && e) +// void LibClient::onReceiveExceptionFromServer(std::unique_ptr && e) // { // LOG_INFO(logger, "received server exception: {}", e->what()); // } // -// void Client::onProfileInfo(const ProfileInfo & profile_info) +// void LibClient::onProfileInfo(const ProfileInfo & profile_info) // { // LOG_INFO(logger, "received ProfileInfo: rows={}", profile_info.rows); // } -// void Client::onEndOfStream() +// void LibClient::onEndOfStream() // { // LOG_INFO(logger, "received EndOfStream"); // } -// void Client::onProfileEvents(Block & block) +// void LibClient::onProfileEvents(Block & block) // { // LOG_INFO(logger, "received ProfileEvents rows = {}", block.rows()); // } diff --git a/src/Client/Client.h b/src/Client/LibClient.h similarity index 78% rename from src/Client/Client.h rename to src/Client/LibClient.h index 4d5802e2ba0..4bd82380456 100644 --- a/src/Client/Client.h +++ b/src/Client/LibClient.h @@ -20,20 +20,20 @@ struct Callbacks std::function on_profile_events; }; -/// Client is for using as a library client without all the complexities for handling terminal stuff like ClientBase does. -class Client final +/// LibClient is for using as a library client without all the complexities for handling terminal stuff like ClientBase does. +class LibClient final { public: - Client(IConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); + LibClient(ConnectionPtr connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); void executeQuery(String query, const Callbacks & callbacks); + void receiveResult(const Callbacks & callbacks); void cancelQuery(); private: - void receiveResult(const Callbacks & callbacks); bool receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks); - IConnectionPool::Entry connection; + ConnectionPtr connection; ConnectionTimeouts timeouts; std::atomic_bool cancelled {false}; diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 35bb9277f80..bf9f63ae1bf 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -35,6 +35,16 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> return name; } + /// proton: starts + String getClickHouseAliasToOrName(const String & name) const + { + if (clickhouse_names.contains(name)) + return clickhouse_names.at(name); + else + return name; + } + /// proton: ends + std::unordered_map case_insensitive_name_mapping; public: @@ -81,6 +91,38 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); } + /// proton: starts + /// Register the name used by ClickHouse for value + /// real_name have to be already registered. + void registerClickHouseAlias(const String & alias_name, const String & alias_or_real_name) + { + const auto & creator_map = getMap(); + const auto & case_insensitive_creator_map = getCaseInsensitiveMap(); + const String factory_name = getFactoryName(); + + String real_dict_name; + String real_name = alias_or_real_name; + if (auto it = aliases.find(real_name); it != aliases.end()) + real_name = it->second; + if (creator_map.count(real_name)) + real_dict_name = real_name; + else if (auto real_name_lowercase = Poco::toLower(real_name); case_insensitive_creator_map.count(real_name_lowercase)) + real_dict_name = real_name_lowercase; + else + throw Exception(factory_name + ": can't create ClickHouse alias '" + alias_name + "', the real name '" + alias_or_real_name + "' is not registered", + ErrorCodes::LOGICAL_ERROR); + + String alias_name_lowercase = Poco::toLower(alias_name); + + if (creator_map.count(alias_name) || case_insensitive_creator_map.count(alias_name_lowercase)) + throw Exception( + factory_name + ": the ClickHouse alias name '" + alias_name + "' is already registered as real name", ErrorCodes::LOGICAL_ERROR); + + if (!clickhouse_names.emplace(alias_name, real_dict_name).second) + throw Exception(factory_name + ": ClickHouse alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); + } + /// proton: ends + std::vector getAllRegisteredNames() const override { std::vector result; @@ -144,6 +186,11 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> /// Case insensitive aliases AliasMap case_insensitive_aliases; + + /// proton: starts + /// ClickHouse names map to data_types from previous two maps + AliasMap clickhouse_names; + /// proton: ends }; } diff --git a/src/DataTypes/ClickHouseDataTypeTranslator.cpp b/src/DataTypes/ClickHouseDataTypeTranslator.cpp new file mode 100644 index 00000000000..ea2d11d8a3a --- /dev/null +++ b/src/DataTypes/ClickHouseDataTypeTranslator.cpp @@ -0,0 +1,118 @@ +#include + +namespace DB +{ + +ClickHouseDataTypeTranslator & ClickHouseDataTypeTranslator::instance() +{ + static ClickHouseDataTypeTranslator ret; + return ret; +} + +ClickHouseDataTypeTranslator::ClickHouseDataTypeTranslator() +{ + /// referece: DataTypeFactory + type_dict = { + {"UInt8", "uint8"}, + {"UInt16", "uint16"}, + {"UInt32", "uint32"}, + {"UInt64", "uint64"}, + + {"Int8", "int8"}, + {"Int16", "int16"}, + {"Int32", "int32"}, + {"Int64", "int64"}, + + {"Float32", "float32"}, + {"Float64", "float64"}, + + {"UInt128", "uint128"}, + {"UInt256", "uint256"}, + + {"Int128", "int128"}, + {"Int256", "int256"}, + + {"BYTE", "byte"}, + {"SMALLINT", "smallint"}, + {"INT", "int"}, + {"UINT", "uint"}, + {"INTEGER", "integer"}, + {"BIGINT", "bigint"}, + {"FLOAT", "float"}, + {"DOUBLE", "double"}, + + {"Decimal32", "decimal32"}, + {"Decimal64", "decimal64"}, + {"Decimal128", "decimal128"}, + {"Decimal256", "decimal256"}, + {"Decimal", "decimal"}, + + {"Date", "date"}, + {"Date32", "date32"}, + + {"DateTime", "datetime"}, + {"DateTime32", "datetime32"}, + {"DateTime64", "datetime64"}, + + {"String", "string"}, + {"VARCHAR", "VARCHAR"}, + + {"FixedString", "fixed_string"}, + + {"Enum8", "enum8"}, + {"Enum16", "enum16"}, + {"Enum", "enum"}, + + {"Array", "array"}, + + {"Tuple", "tuple"}, + + {"Nullable", "nullable"}, + + {"Nothing", "nothing"}, + + {"UUID", "uuid"}, + + {"IPv4", "ipv4"}, + {"INET", "inet"}, + {"IPv6", "ipv6"}, + {"INET6", "inet6"}, + + {"AggregateFunction", "aggregate_function"}, + + {"Nested", "nested"}, + + {"IntervalNanosecond", "interval_nanosecond"}, + {"IntervalMicrosecond", "interval_microsecond"}, + {"IntervalMillisecond", "interval_millisecond"}, + {"IntervalSecond", "interval_second"}, + {"IntervalMinute", "interval_minute"}, + {"IntervalHour", "interval_hour"}, + {"IntervalDay", "interval_day"}, + {"IntervalWeek", "interval_week"}, + {"IntervalMonth", "interval_month"}, + {"IntervalQuarter", "interval_quarter"}, + {"IntervalYear", "interval_year"}, + + {"LowCardinality", "low_cardinality"}, + + {"Bool", "bool"}, + + {"SimpleAggregateFunction", "simple_aggregate_function"}, + + {"Map", "map"}, + + {"JSON", "json"}, + }; +} + +std::string ClickHouseDataTypeTranslator::translate(const std::string & type_name) +{ + auto it = type_dict.find(type_name); + if (it == type_dict.end()) + return type_name; + + return it->second; +} + +} diff --git a/src/DataTypes/ClickHouseDataTypeTranslator.h b/src/DataTypes/ClickHouseDataTypeTranslator.h new file mode 100644 index 00000000000..35d8c1872dd --- /dev/null +++ b/src/DataTypes/ClickHouseDataTypeTranslator.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +class ClickHouseDataTypeTranslator final : public IDataTypeTranslator +{ +public: + static ClickHouseDataTypeTranslator & instance(); + + ~ClickHouseDataTypeTranslator() override = default; + + std::string translate(const std::string & type_name) override; + +private: + ClickHouseDataTypeTranslator(); + + std::unordered_map type_dict; +}; + +} diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index c8a09dff73b..5c0c8e436e3 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -259,6 +259,8 @@ void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::opti void registerDataTypeAggregateFunction(DataTypeFactory & factory) { factory.registerDataType("aggregate_function", create); + + factory.registerClickHouseAlias("AggregateFunction", "aggregate_function"); } } diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 3fc290f8732..cf9ada743e4 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -71,6 +71,8 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeArray(DataTypeFactory & factory) { factory.registerDataType("array", create); + + factory.registerClickHouseAlias("Array", "array"); } } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 7cc4866eb38..1bd3268cabe 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -145,6 +145,8 @@ static std::pair create(const ASTPtr & argum void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) { factory.registerDataTypeCustom("simple_aggregate_function", create); + + factory.registerClickHouseAlias("SimpleAggregateFunction", "simple_aggregate_function"); } } diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index d8daf8b97ef..331767e1b16 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -18,6 +18,8 @@ SerializationPtr DataTypeDate::doGetDefaultSerialization() const void registerDataTypeDate(DataTypeFactory & factory) { factory.registerSimpleDataType("date", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + + // factory.registerClickHouseAlias("Date", "date"); } } diff --git a/src/DataTypes/DataTypeDomainBool.cpp b/src/DataTypes/DataTypeDomainBool.cpp index db8e7bd1066..89a4c428e54 100644 --- a/src/DataTypes/DataTypeDomainBool.cpp +++ b/src/DataTypes/DataTypeDomainBool.cpp @@ -15,6 +15,8 @@ void registerDataTypeDomainBool(DataTypeFactory & factory) }); factory.registerAlias("boolean", "bool"); + + factory.registerClickHouseAlias("Bool", "bool"); } } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 07b9cf88c46..d651adb3b5e 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -315,6 +315,10 @@ void registerDataTypeEnum(DataTypeFactory & factory) /// MySQL /// factory.registerAlias("ENUM", "enum", DataTypeFactory::CaseInsensitive); + + factory.registerClickHouseAlias("Enum8", "enum8"); + factory.registerClickHouseAlias("Enum16", "enum16"); + factory.registerClickHouseAlias("Enum", "enum"); } } diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index afa5bd26f2d..8da07977328 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -34,7 +34,7 @@ DataTypePtr DataTypeFactory::get(TypeIndex type) const } /// proton: ends. -DataTypePtr DataTypeFactory::get(const String & full_name) const +DataTypePtr DataTypeFactory::get(const String & full_name/* proton: starts*/, bool compatible_with_clickhouse/* proton: ends*/) const { /// Data type parser can be invoked from coroutines with small stack. /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) @@ -49,21 +49,21 @@ DataTypePtr DataTypeFactory::get(const String & full_name) const ParserDataType parser; ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth); - return get(ast); + return get(ast, compatible_with_clickhouse); } -DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const +DataTypePtr DataTypeFactory::get(const ASTPtr & ast/* proton: starts */, bool compatible_with_clickhouse/* proton: ends*/) const { if (const auto * func = ast->as()) { if (func->parameters) throw Exception("Data type cannot have multiple parenthesized parameters.", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); - return get(func->name, func->arguments); + return get(func->name, func->arguments, compatible_with_clickhouse); } if (const auto * ident = ast->as()) { - return get(ident->name(), {}); + return get(ident->name(), {}, compatible_with_clickhouse); } if (const auto * lit = ast->as()) @@ -75,9 +75,13 @@ DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const throw Exception("Unexpected AST element for data type.", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } -DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const +DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters/* proton: starts */, bool compatible_with_clickhouse/* proton: ends */) const { - String family_name = getAliasToOrName(family_name_param); + String family_name; + if (compatible_with_clickhouse) + family_name = getAliasToOrName(getClickHouseAliasToOrName(family_name_param)); + else + family_name = getAliasToOrName(family_name_param); if (endsWith(family_name, "_with_dictionary")) { diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 6301548548d..eb8b09e0c1e 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -35,9 +35,9 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli DataTypePtr get(TypeIndex type) const; /// proton: ends. - DataTypePtr get(const String & full_name) const; - DataTypePtr get(const String & family_name, const ASTPtr & parameters) const; - DataTypePtr get(const ASTPtr & ast) const; + DataTypePtr get(const String & full_name/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; + DataTypePtr get(const String & family_name, const ASTPtr & parameters/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; + DataTypePtr get(const ASTPtr & ast/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; DataTypePtr getCustom(DataTypeCustomDescPtr customization) const; /// Register a type family by its name. @@ -67,6 +67,7 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli const DataTypesDictionary & getCaseInsensitiveMap() const override { return case_insensitive_data_types; } String getFactoryName() const override { return "DataTypeFactory"; } + }; void registerDataTypeNumbers(DataTypeFactory & factory); diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 6b87b530261..53cca18e8e2 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -60,6 +60,8 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeFixedString(DataTypeFactory & factory) { factory.registerDataType("fixed_string", create); + + factory.registerClickHouseAlias("FixedString", "fixed_string"); } } diff --git a/src/DataTypes/DataTypeIPv4andIPv6.cpp b/src/DataTypes/DataTypeIPv4andIPv6.cpp index 6d091182cd7..39e87320425 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.cpp +++ b/src/DataTypes/DataTypeIPv4andIPv6.cpp @@ -12,6 +12,9 @@ void registerDataTypeIPv4andIPv6(DataTypeFactory & factory) factory.registerAlias("inet4", "ipv4", DataTypeFactory::CaseInsensitive); factory.registerSimpleDataType("ipv6", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); factory.registerAlias("inet6", "ipv6", DataTypeFactory::CaseInsensitive); + + factory.registerClickHouseAlias("INET4", "inet4"); + factory.registerClickHouseAlias("INET6", "inet6"); } } diff --git a/src/DataTypes/DataTypeInterval.cpp b/src/DataTypes/DataTypeInterval.cpp index f82d5066fb8..f288567eaaf 100644 --- a/src/DataTypes/DataTypeInterval.cpp +++ b/src/DataTypes/DataTypeInterval.cpp @@ -24,6 +24,18 @@ void registerDataTypeInterval(DataTypeFactory & factory) factory.registerSimpleDataType("interval_month", [] { return DataTypePtr(std::make_shared(IntervalKind::Month)); }); factory.registerSimpleDataType("interval_quarter", [] { return DataTypePtr(std::make_shared(IntervalKind::Quarter)); }); factory.registerSimpleDataType("interval_year", [] { return DataTypePtr(std::make_shared(IntervalKind::Year)); }); + + factory.registerClickHouseAlias("IntervalNanosecond", "interval_nanosecond"); + factory.registerClickHouseAlias("IntervalMicrosecond", "interval_microsecond"); + factory.registerClickHouseAlias("IntervalMillisecond", "interval_millisecond"); + factory.registerClickHouseAlias("IntervalSecond", "interval_second"); + factory.registerClickHouseAlias("IntervalMinute", "interval_minute"); + factory.registerClickHouseAlias("IntervalHour", "interval_hour"); + factory.registerClickHouseAlias("IntervalDay", "interval_day"); + factory.registerClickHouseAlias("IntervalWeek", "interval_week"); + factory.registerClickHouseAlias("IntervalMonth", "interval_month"); + factory.registerClickHouseAlias("IntervalQuarter", "interval_quarter"); + factory.registerClickHouseAlias("IntervalYear", "interval_year"); } } diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index bae6c1f70cc..e134c99d777 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -162,6 +162,8 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeLowCardinality(DataTypeFactory & factory) { factory.registerDataType("low_cardinality", create); + + factory.registerClickHouseAlias("LowCardinality", "low_cardinality"); } diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 6bca147022d..d425ec1e54f 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -145,5 +145,7 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeMap(DataTypeFactory & factory) { factory.registerDataType("map", create); + + factory.registerClickHouseAlias("Map", "map"); } } diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index f0ef3c638d1..04626be209b 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -61,7 +61,9 @@ static std::pair create(const ASTPtr & argum void registerDataTypeNested(DataTypeFactory & factory) { - return factory.registerDataTypeCustom("nested", create); + factory.registerDataTypeCustom("nested", create); + + factory.registerClickHouseAlias("Nested", "nested"); } DataTypePtr createNested(const DataTypes & types, const Names & names) diff --git a/src/DataTypes/DataTypeNothing.cpp b/src/DataTypes/DataTypeNothing.cpp index 09019b2c83b..afc36b1f529 100644 --- a/src/DataTypes/DataTypeNothing.cpp +++ b/src/DataTypes/DataTypeNothing.cpp @@ -26,6 +26,8 @@ SerializationPtr DataTypeNothing::doGetDefaultSerialization() const void registerDataTypeNothing(DataTypeFactory & factory) { factory.registerSimpleDataType("nothing", [] { return DataTypePtr(std::make_shared()); }); + + factory.registerClickHouseAlias("Nothing", "nothing"); } } diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index d2e192e2b7f..c8dbe17070a 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -75,6 +75,8 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeNullable(DataTypeFactory & factory) { factory.registerDataType("nullable", create); + + factory.registerClickHouseAlias("Nullable", "nullable"); } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index caea32299e5..3c914e6a545 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -94,5 +94,7 @@ void registerDataTypeString(DataTypeFactory & factory) /// factory.registerAlias("VARBINARY", "string", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("GEOMETRY", "string", DataTypeFactory::CaseInsensitive); //mysql + factory.registerClickHouseAlias("String", "string"); + // factory.registerClickHouseAlias("VARCHAR", "VARCHAR"); } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 5bcb7a3edba..926f472021c 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -373,6 +373,8 @@ static DataTypePtr create(const ASTPtr & arguments) void registerDataTypeTuple(DataTypeFactory & factory) { factory.registerDataType("tuple", create); + + factory.registerClickHouseAlias("Tuple", "tuple"); } } diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp index e4044b3afd5..9648f1f6bb8 100644 --- a/src/DataTypes/DataTypeUUID.cpp +++ b/src/DataTypes/DataTypeUUID.cpp @@ -29,6 +29,8 @@ MutableColumnPtr DataTypeUUID::createColumn() const void registerDataTypeUUID(DataTypeFactory & factory) { factory.registerSimpleDataType("uuid", [] { return DataTypePtr(std::make_shared()); }); + + factory.registerClickHouseAlias("Uuid", "uuid"); } } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 1f7313fa946..90de5d0f14b 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -113,6 +113,13 @@ void registerDataTypeDecimal(DataTypeFactory & factory) /// factory.registerAlias("DEC", "decimal", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("NUMERIC", "decimal", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("FIXED", "decimal", DataTypeFactory::CaseInsensitive); + + // factory.registerClickHouseAlias("Decimal32", "decimal32"); + // factory.registerClickHouseAlias("Decimal64", "decimal64"); + // factory.registerClickHouseAlias("Decimal128", "decimal128"); + // factory.registerClickHouseAlias("Decimal256", "decimal256"); + + // factory.registerClickHouseAlias("Decimal", "decimal"); } /// Explicit template instantiations. diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 96eaadc9efd..fa9a88002b4 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -91,6 +91,34 @@ void registerDataTypeNumbers(DataTypeFactory & factory) /// factory.registerAlias("SET", "uint64", DataTypeFactory::CaseInsensitive); /// MySQL /// factory.registerAlias("YEAR", "uint16", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("TIME", "int64", DataTypeFactory::CaseInsensitive); + + factory.registerClickHouseAlias("UInt8", "uint8"); + factory.registerClickHouseAlias("UInt16", "uint16"); + factory.registerClickHouseAlias("UInt32", "uint32"); + factory.registerClickHouseAlias("UInt64", "uint64"); + + factory.registerClickHouseAlias("Int8", "int8"); + factory.registerClickHouseAlias("Int16", "int16"); + factory.registerClickHouseAlias("Int32", "int32"); + factory.registerClickHouseAlias("Int64", "int64"); + + factory.registerClickHouseAlias("Float32", "float32"); + factory.registerClickHouseAlias("Float64", "float64"); + + factory.registerClickHouseAlias("UInt128", "uint128"); + factory.registerClickHouseAlias("UInt256", "uint256"); + + factory.registerClickHouseAlias("Int128", "int128"); + factory.registerClickHouseAlias("Int256", "int256"); + + factory.registerClickHouseAlias("BYTE", "byte"); + factory.registerClickHouseAlias("SMALLINT", "smallint"); + factory.registerClickHouseAlias("INT", "int"); + factory.registerClickHouseAlias("UINT", "uint"); + factory.registerClickHouseAlias("INTEGER", "integer"); + factory.registerClickHouseAlias("BIGINT", "bigint"); + factory.registerClickHouseAlias("FLOAT", "float"); + factory.registerClickHouseAlias("DOUBLE", "double"); } } diff --git a/src/DataTypes/IDataTypeTranslator.h b/src/DataTypes/IDataTypeTranslator.h new file mode 100644 index 00000000000..0c897cbc7b0 --- /dev/null +++ b/src/DataTypes/IDataTypeTranslator.h @@ -0,0 +1,14 @@ +#pragma once + +namespace DB +{ + +class IDataTypeTranslator +{ +public: + virtual ~IDataTypeTranslator() = default; + + virtual std::string translate(const std::string & type_name) = 0; +}; + +} diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index bdc936daf85..4dee1581a49 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -111,6 +111,10 @@ void registerDataTypeDateTime(DataTypeFactory & factory) factory.registerDataType("datetime64", create64, DataTypeFactory::CaseInsensitive); /// factory.registerAlias("TIMESTAMP", "datetime", DataTypeFactory::CaseInsensitive); + + // factory.registerClickHouseAlias("Datetime", "datetime"); + // factory.registerClickHouseAlias("Datetime32", "datetime32"); + // factory.registerClickHouseAlias("Datetime64", "datetime64"); } } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index bdc144a929e..c92e6a80c1a 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -140,7 +140,7 @@ Block NativeReader::read() /// Type String type_name; readStringBinary(type_name, istr); - column.type = data_type_factory.get(type_name); + column.type = data_type_factory.get(type_name/* proton: starts */, !!data_type_translator/* proton: ends */); setVersionToAggregateFunctions(column.type, true, server_revision); @@ -230,4 +230,10 @@ void NativeReader::updateAvgValueSizeHints(const Block & block) } } +/// proton: starts +void NativeReader::setDataTypeTranslator(IDataTypeTranslator * translator) +{ + data_type_translator = translator; +} +/// proton: ends } diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 1f9eb8b9764..987c9987d15 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -5,6 +5,10 @@ #include #include +/// proton: starts +#include +/// proton: ends + namespace DB { @@ -39,6 +43,10 @@ class NativeReader Block read(); + /// proton: starts + void setDataTypeTranslator(IDataTypeTranslator * translator); + /// proton: ends + private: ReadBuffer & istr; Block header; @@ -55,6 +63,10 @@ class NativeReader PODArray avg_value_size_hints; void updateAvgValueSizeHints(const Block & block); + + /// proton: starts + IDataTypeTranslator * data_type_translator {nullptr}; + /// proton: ends }; } diff --git a/src/Storages/ExternalTable/CMakeLists.txt b/src/Storages/ExternalTable/CMakeLists.txt index bd39e1ba864..4558bbb4e93 100644 --- a/src/Storages/ExternalTable/CMakeLists.txt +++ b/src/Storages/ExternalTable/CMakeLists.txt @@ -1,6 +1,7 @@ include("${proton_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(external_table .) +add_headers_and_sources(external_table ClickHouse) add_library(external_table ${external_table_headers} ${external_table_sources}) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 31dd5ae59df..a6c3d8bc537 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -1,5 +1,9 @@ +#include +#include +#include #include -#include "Client/Client.h" +#include +#include namespace DB { @@ -23,36 +27,25 @@ ClickHouse::ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_) if (!port) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid port in ClickHouse address"); - connection_pool = ConnectionPoolFactory::instance().get( - 100 /*max_connections*/, - host, port, - "default" /*default_database*/, - "default" /*user*/, - "" /*password*/, - "" /*quota_key*/, - "" /*cluster*/, - "" /*cluster_secret*/, - "Timeplus Proton" /*client_name*/, - Protocol::Compression::Enable, - Protocol::Secure::Disable, - 0 /*priority*/); - - timeouts = ConnectionTimeouts( + connection_params.host = host; + connection_params.port = port; + connection_params.user = settings->user.value; + connection_params.password = settings->password.value; + connection_params.default_database = "default"; + connection_params.timeouts = { 10 * 60 * 1'000'000 /*connection_timeout_*/, 10 * 60 * 1'000'000 /*send_timeout_*/, 10 * 60 * 1'000'000 /*receive_timeout_*/ - ); + }; } void ClickHouse::startup() { - NameToNameMap map; - auto conn = connection_pool->get(timeouts); - Client client {conn, timeouts, context, logger}; +#ifdef GO_ON_PRODUCTIOn client.executeQuery("DESCRIBE TABLE " + table, { - .on_data = [log = logger](Block & block) + .on_data = [this](Block & block) { - LOG_INFO(log, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); + LOG_INFO(logger, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); auto cols = block.getColumns(); for (size_t i = 0; i < block.rows(); ++i) { @@ -65,6 +58,70 @@ void ClickHouse::startup() } } }); +#endif + LOG_INFO(logger, "startup"); +} + +SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) +{ + return std::make_shared(metadata_snapshot->getSampleBlock(), logger); +} + +ColumnsDescription ClickHouse::getTableStructure() +{ + auto conn = std::make_shared( + connection_params.host, + connection_params.port, + connection_params.default_database, + connection_params.user, + connection_params.password, + connection_params.quota_key, + "", /*cluster*/ + "", /*cluster_secret*/ + "TimeplusProton", + connection_params.compression, + connection_params.security); + + conn->setDataTypeTranslator(&ClickHouseDataTypeTranslator::instance()); + + LOG_INFO(logger, "executing SQL: DESCRIBE TABLE {}", table); + conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); + LOG_INFO(logger, "receiving data"); + + ColumnsDescription ret {}; + + LibClient client {std::move(conn), connection_params.timeouts, context, logger}; + client.receiveResult({ + .on_data = [this, &ret](Block & block) + { + LOG_INFO(logger, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); + if (!block.rows()) + return; + + const auto & cols = block.getColumns(); + const auto & factory = DataTypeFactory::instance(); + for (size_t i = 0; i < block.rows(); ++i) + { + ColumnDescription col_desc {}; + { + const auto & col = block.getByName("name"); + col_desc.name = col.column->getDataAt(i).toString(); + } + { + const auto & col = block.getByName("type"); + col_desc.type = factory.get(col.column->getDataAt(i).toString(), true); + } + { + const auto & col = block.getByName("comment"); + col_desc.comment = col.column->getDataAt(i).toString(); + } + LOG_INFO(logger, "row {}: col_name = {}, col_type = {}", i, col_desc.name, col_desc.type); + ret.add(col_desc, String(), false, false); + } + } + }); + + return ret; } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h index 691c7c54c91..287b62f01a9 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -18,12 +18,12 @@ class ClickHouse final : public IExternalTable void startup() override; void shutdown() override {} + ColumnsDescription getTableStructure() override; + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; private: - ConnectionPoolPtr connection_pool; - ConnectionTimeouts timeouts; - + ConnectionParameters connection_params; String table; ContextPtr & context; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp new file mode 100644 index 00000000000..b7ea1776c6c --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -0,0 +1,23 @@ +#include +#include + +namespace DB +{ + +namespace ExternalTable +{ + +ClickHouseSink::ClickHouseSink(const Block & header, Poco::Logger * logger_) + : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) + , logger(logger_) +{ +} + +void ClickHouseSink::consume(Chunk chunk) +{ + LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); +} + +} + +} diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h new file mode 100644 index 00000000000..99515887bb1 --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ExternalTable +{ + +class ClickHouseSink final : public SinkToStorage +{ +public: + ClickHouseSink(const Block & header, const ConnectionParameters & params, const ConnectionTimeouts & timeouts, Poco::Logger * logger_); + + String getName() const override { return "ClickHouseSink"; } + + void consume(Chunk chunk) override; + +private: + std::unique_ptr conn; + + Poco::Logger * logger; +}; + +} + +} diff --git a/src/Storages/ExternalTable/ExternalTableImpl.h b/src/Storages/ExternalTable/ExternalTableImpl.h index f556c83cbf8..6141bc5e37c 100644 --- a/src/Storages/ExternalTable/ExternalTableImpl.h +++ b/src/Storages/ExternalTable/ExternalTableImpl.h @@ -14,6 +14,8 @@ class IExternalTable virtual void startup() = 0; virtual void shutdown() = 0; + virtual ColumnsDescription getTableStructure() = 0; + virtual SinkToStoragePtr write(const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Ingesting data to this type of external stream is not supported"); diff --git a/src/Storages/ExternalTable/ExternalTableSettings.h b/src/Storages/ExternalTable/ExternalTableSettings.h index ca983f9547d..5f76c9179b8 100644 --- a/src/Storages/ExternalTable/ExternalTableSettings.h +++ b/src/Storages/ExternalTable/ExternalTableSettings.h @@ -11,6 +11,8 @@ class ASTStorage; M(String, type, "", "External table type", 0) \ /* ClickHouse settings */ \ M(String, address, "", "The address of the ClickHouse server to connect", 0) \ + M(String, user, "", "The user to be used to connect to the ClickHouse server", 0) \ + M(String, password, "", "The password to be used to connect to the ClickHouse server", 0) \ M(String, table, "", "The ClickHouse table to which the external table is mapped", 0) DECLARE_SETTINGS_TRAITS(ExternalTableSettingsTraits, LIST_OF_EXTERNAL_TABLE_SETTINGS) diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index 5d7ecbdbd8d..a5e0d602924 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -4,18 +4,16 @@ #include #include #include -#include #include "Storages/ExternalTable/ClickHouse/ClickHouse.h" namespace DB { StorageExternalTable::StorageExternalTable( - const StorageID & table_id_, std::unique_ptr settings, - ContextPtr context_) -: IStorage(table_id_) -, WithContext(context_->getGlobalContext()) + const StorageFactory::Arguments & args) +: IStorage(args.table_id) +, WithContext(args.getContext()->getGlobalContext()) { auto type = settings->type.value; if (type == "clickhouse") @@ -25,6 +23,8 @@ StorageExternalTable::StorageExternalTable( } else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown external table type: {}", type); + + setStorageMetadata(args); } SinkToStoragePtr StorageExternalTable::write( @@ -35,19 +35,27 @@ SinkToStoragePtr StorageExternalTable::write( return external_table->write(query, metadata_snapshot, context_); } +void StorageExternalTable::setStorageMetadata(const StorageFactory::Arguments & args) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(external_table->getTableStructure()); + + storage_metadata.setConstraints(args.constraints); + storage_metadata.setComment(args.comment); + setInMemoryMetadata(storage_metadata); +} + void registerStorageExternalTable(StorageFactory & factory) { auto creator_fn = [](const StorageFactory::Arguments & args) { - if (args.storage_def->settings) - { - auto settings = std::make_unique(); - settings->loadFromQuery(*args.storage_def); - - return StorageExternalTable::create(args.table_id, std::move(settings), args.getContext()); - } - else + if (!args.storage_def->settings) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External table requires correct settings setup"); + + auto settings = std::make_unique(); + settings->loadFromQuery(*args.storage_def); + + return StorageExternalTable::create(std::move(settings), args); }; factory.registerStorage( @@ -59,5 +67,4 @@ void registerStorageExternalTable(StorageFactory & factory) }); } - } diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index 87bcecd8dfb..c702c86a30c 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -4,6 +4,8 @@ #include #include #include +#include + #include namespace DB @@ -28,12 +30,11 @@ class StorageExternalTable final : public shared_ptr_helper settings, - ContextPtr context_); + StorageExternalTable(std::unique_ptr settings, const StorageFactory::Arguments & args); private: + void setStorageMetadata(const StorageFactory::Arguments & args); + IExternalTablePtr external_table; /// TBD From 49af98b2948f7a19468cfbb1dea856c0315650bc Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Fri, 26 Jan 2024 21:01:32 -0800 Subject: [PATCH 06/26] ClickHouseSink --- src/Client/Connection.cpp | 21 ++++++----- src/Client/Connection.h | 8 ++--- src/Formats/NativeReader.h | 8 ++--- .../ExternalTable/ClickHouse/ClickHouse.cpp | 4 +-- .../ClickHouse/ClickHouseSink.cpp | 35 ++++++++++++++++--- .../ExternalTable/ClickHouse/ClickHouseSink.h | 3 +- 6 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 15c5b6a2331..d261cf45017 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -986,8 +986,8 @@ void Connection::initBlockInput() block_in = std::make_unique(*maybe_compressed_in, server_revision); /// proton: starts - if (data_type_translator) - block_in->setDataTypeTranslator(data_type_translator); + if (compatible_with_clickhouse) + block_in->setCompatibleWithClickHouse(); /// proton: ends } } @@ -1000,8 +1000,7 @@ void Connection::initBlockLogsInput() /// Have to return superset of SystemLogsQueue::getSampleBlock() columns block_logs_in = std::make_unique(*in, server_revision); /// proton: starts - if (data_type_translator) - block_logs_in->setDataTypeTranslator(data_type_translator); + if (compatible_with_clickhouse) block_logs_in->setCompatibleWithClickHouse(); /// proton: ends } } @@ -1013,8 +1012,8 @@ void Connection::initBlockProfileEventsInput() { block_profile_events_in = std::make_unique(*in, server_revision); /// proton: starts - if (block_profile_events_in) - block_profile_events_in->setDataTypeTranslator(data_type_translator); + if (compatible_with_clickhouse) + block_profile_events_in->setCompatibleWithClickHouse(); /// proton: ends } } @@ -1098,15 +1097,15 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa } /// proton: starts -void Connection::setDataTypeTranslator(IDataTypeTranslator * translator) +void Connection::setCompatibleWithClickHouse() { - data_type_translator = translator; + compatible_with_clickhouse = true; if (block_in) - block_in->setDataTypeTranslator(data_type_translator); + block_in->setCompatibleWithClickHouse(); if (block_logs_in) - block_logs_in->setDataTypeTranslator(data_type_translator); + block_logs_in->setCompatibleWithClickHouse(); if (block_profile_events_in) - block_profile_events_in->setDataTypeTranslator(data_type_translator); + block_profile_events_in->setCompatibleWithClickHouse(); } /// proton: ends diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 0f0a3d13080..40f1c004d40 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -35,10 +35,6 @@ using Connections = std::vector; class NativeReader; class NativeWriter; -/// proton: starts -class IDataTypeTranslator; -/// proton: ends - /** Connection with database server, to use by client. * How to use - see Core/Protocol.h @@ -161,7 +157,7 @@ class Connection : public IServerConnection } /// proton: starts - void setDataTypeTranslator(IDataTypeTranslator * translator); + void setCompatibleWithClickHouse(); /// proton: ends private: String host; @@ -283,7 +279,7 @@ class Connection : public IServerConnection [[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const; /// proton: starts - IDataTypeTranslator * data_type_translator = nullptr; + bool compatible_with_clickhouse {false}; /// proton: ends }; diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 987c9987d15..fa1e3240aab 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -5,10 +5,6 @@ #include #include -/// proton: starts -#include -/// proton: ends - namespace DB { @@ -44,7 +40,7 @@ class NativeReader Block read(); /// proton: starts - void setDataTypeTranslator(IDataTypeTranslator * translator); + void setCompatibleWithClickHouse() { compatible_with_clickhouse = true; } /// proton: ends private: @@ -65,7 +61,7 @@ class NativeReader void updateAvgValueSizeHints(const Block & block); /// proton: starts - IDataTypeTranslator * data_type_translator {nullptr}; + bool compatible_with_clickhouse {false}; /// proton: ends }; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index a6c3d8bc537..9b0eebe6eed 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -64,7 +64,7 @@ void ClickHouse::startup() SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) { - return std::make_shared(metadata_snapshot->getSampleBlock(), logger); + return std::make_shared(metadata_snapshot->getSampleBlock(), connection_params, logger); } ColumnsDescription ClickHouse::getTableStructure() @@ -82,7 +82,7 @@ ColumnsDescription ClickHouse::getTableStructure() connection_params.compression, connection_params.security); - conn->setDataTypeTranslator(&ClickHouseDataTypeTranslator::instance()); + conn->setCompatibleWithClickHouse(); LOG_INFO(logger, "executing SQL: DESCRIBE TABLE {}", table); conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index b7ea1776c6c..bc2fac31724 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -1,21 +1,46 @@ -#include +#include #include +#include +#include "IO/WriteBufferFromString.h" +#include namespace DB -{ + { namespace ExternalTable { -ClickHouseSink::ClickHouseSink(const Block & header, Poco::Logger * logger_) - : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) - , logger(logger_) +ClickHouseSink::ClickHouseSink(const Block & header, const ConnectionParameters & params_, Poco::Logger * logger_) + : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID), params(params_), logger(logger_) { + conn = std::make_unique( + params.host, + params.port, + params.default_database, + params.user, + params.password, + params.quota_key, + "", /*cluster*/ + "", /*cluster_secret*/ + "TimeplusProton", + params.compression, + params.security); + + conn->setCompatibleWithClickHouse(); } void ClickHouseSink::consume(Chunk chunk) { LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); + ASTInsertQuery query{}; + query.setDatabase("default"); + query.setTable("my_first_table"); + + WriteBufferFromOwnString wb {}; + const IAST::FormatSettings fmt_settings {wb, true}; + ASTInsertQuery * query_ptr = &query; + dynamic_cast(query_ptr)->format(fmt_settings); + conn->sendQuery(params.timeouts, wb.str(), {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index 99515887bb1..77d55fb03da 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -12,13 +12,14 @@ namespace ExternalTable class ClickHouseSink final : public SinkToStorage { public: - ClickHouseSink(const Block & header, const ConnectionParameters & params, const ConnectionTimeouts & timeouts, Poco::Logger * logger_); + ClickHouseSink(const Block & header, const ConnectionParameters & params_, Poco::Logger * logger_); String getName() const override { return "ClickHouseSink"; } void consume(Chunk chunk) override; private: + const ConnectionParameters & params; std::unique_ptr conn; Poco::Logger * logger; From 69c55e9e43d5d1cc292e1645988640e15367f1c0 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sat, 27 Jan 2024 03:28:43 -0800 Subject: [PATCH 07/26] simple insert --- src/Client/Connection.cpp | 6 ++ src/Client/LibClient.cpp | 14 ++-- src/Client/LibClient.h | 4 +- src/Common/IFactoryWithAliases.h | 17 +++-- src/DataTypes/DataTypeDate.cpp | 2 +- src/DataTypes/DataTypeDate32.cpp | 2 + src/DataTypes/DataTypeFactory.cpp | 42 +++++++++++- src/DataTypes/DataTypeFactory.h | 1 + src/DataTypes/DataTypeIPv4andIPv6.cpp | 2 + src/DataTypes/DataTypeObject.cpp | 2 + src/DataTypes/DataTypeUUID.cpp | 2 +- src/DataTypes/DataTypesDecimal.cpp | 10 +-- src/DataTypes/registerDataTypeDateTime.cpp | 6 +- src/Formats/NativeReader.cpp | 8 +-- src/Formats/NativeWriter.cpp | 7 ++ src/Formats/NativeWriter.h | 8 +++ .../ExternalTable/ClickHouse/ClickHouse.cpp | 6 +- .../ClickHouse/ClickHouseSink.cpp | 64 +++++++++++++++---- .../ExternalTable/ClickHouse/ClickHouseSink.h | 3 +- 19 files changed, 157 insertions(+), 49 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index d261cf45017..6576b2c13b7 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -589,6 +589,10 @@ void Connection::sendData(const Block & block, const String & name, bool scalar) maybe_compressed_out = out; block_out = std::make_unique(*maybe_compressed_out, block.cloneEmpty(), server_revision); + /// proton: starts + if (compatible_with_clickhouse) + block_out->setCompatibleWithClickHouse(); + /// proton: ends } if (scalar) @@ -1106,6 +1110,8 @@ void Connection::setCompatibleWithClickHouse() block_logs_in->setCompatibleWithClickHouse(); if (block_profile_events_in) block_profile_events_in->setCompatibleWithClickHouse(); + if (block_out) + block_out->setCompatibleWithClickHouse(); } /// proton: ends diff --git a/src/Client/LibClient.cpp b/src/Client/LibClient.cpp index 0e33bf5ad6a..a82e5452358 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/LibClient.cpp @@ -11,7 +11,7 @@ extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_PACKET_FROM_SERVER; } -LibClient::LibClient(ConnectionPtr connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) +LibClient::LibClient(Connection & connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) : connection(connection_) , timeouts(timeouts_) , context(context_) @@ -26,7 +26,7 @@ void LibClient::executeQuery(String query, const Callbacks & callbacks) { try { - connection->sendQuery( + connection.sendQuery( timeouts, query, {}, @@ -34,7 +34,7 @@ void LibClient::executeQuery(String query, const Callbacks & callbacks) QueryProcessingStage::Complete, nullptr, nullptr, - true); + false); receiveResult(callbacks); @@ -86,7 +86,7 @@ void LibClient::receiveResult(const Callbacks & callbacks) /// Poll for changes after a cancellation check, otherwise it never reached /// because of progress updates from server. - if (connection->poll(poll_interval)) + if (connection.poll(poll_interval)) break; } @@ -100,7 +100,7 @@ void LibClient::receiveResult(const Callbacks & callbacks) void LibClient::cancelQuery() { - connection->sendCancel(); + connection.sendCancel(); cancelled = true; } @@ -109,7 +109,7 @@ void LibClient::cancelQuery() /// Output of result is suppressed if query was cancelled. bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks) { - Packet packet = connection->receivePacket(); + Packet packet = connection.receivePacket(); Chunk chunk {}; @@ -167,7 +167,7 @@ bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callb default: throw Exception( - ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); + ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection.getDescription()); } } diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index 4bd82380456..53315c868dc 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -24,7 +24,7 @@ struct Callbacks class LibClient final { public: - LibClient(ConnectionPtr connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); + LibClient(Connection & connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); void executeQuery(String query, const Callbacks & callbacks); void receiveResult(const Callbacks & callbacks); @@ -33,7 +33,7 @@ class LibClient final private: bool receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks); - ConnectionPtr connection; + Connection & connection; ConnectionTimeouts timeouts; std::atomic_bool cancelled {false}; diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index bf9f63ae1bf..791c4fd27e9 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -43,6 +43,14 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> else return name; } + + String getClickHouseAliasFromOrName(const String & name) const + { + if (reversed_clickhouse_names.contains(name)) + return reversed_clickhouse_names.at(name); + else + return name; + } /// proton: ends std::unordered_map case_insensitive_name_mapping; @@ -112,14 +120,9 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> throw Exception(factory_name + ": can't create ClickHouse alias '" + alias_name + "', the real name '" + alias_or_real_name + "' is not registered", ErrorCodes::LOGICAL_ERROR); - String alias_name_lowercase = Poco::toLower(alias_name); - - if (creator_map.count(alias_name) || case_insensitive_creator_map.count(alias_name_lowercase)) - throw Exception( - factory_name + ": the ClickHouse alias name '" + alias_name + "' is already registered as real name", ErrorCodes::LOGICAL_ERROR); - if (!clickhouse_names.emplace(alias_name, real_dict_name).second) throw Exception(factory_name + ": ClickHouse alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); + reversed_clickhouse_names.emplace(real_dict_name, alias_name); } /// proton: ends @@ -190,6 +193,8 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> /// proton: starts /// ClickHouse names map to data_types from previous two maps AliasMap clickhouse_names; + /// For looking up Proton type names from ClickHouse names + AliasMap reversed_clickhouse_names; /// proton: ends }; diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index 331767e1b16..e7203536181 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -19,7 +19,7 @@ void registerDataTypeDate(DataTypeFactory & factory) { factory.registerSimpleDataType("date", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); - // factory.registerClickHouseAlias("Date", "date"); + factory.registerClickHouseAlias("Date", "date"); } } diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index 02945162d24..dc00020377d 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -18,6 +18,8 @@ void registerDataTypeDate32(DataTypeFactory & factory) { factory.registerSimpleDataType( "date32", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + + factory.registerClickHouseAlias("Date32", "date32"); } } diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 8da07977328..fb6bf1e0e06 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -32,6 +32,46 @@ DataTypePtr DataTypeFactory::get(TypeIndex type) const { return get(typeIndexToTypeName(type)); } + +String DataTypeFactory::getClickHouseNameFromName(const String & name) const +{ + /// Data type parser can be invoked from coroutines with small stack. + /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) + /// let's make the threshold significantly lower. + /// It is impractical for user to have complex data types with this depth. + +#if defined(SANITIZER) || !defined(NDEBUG) + static constexpr size_t data_type_max_parse_depth = 150; +#else + static constexpr size_t data_type_max_parse_depth = 300; +#endif + + ParserDataType parser; + ASTPtr ast = parseQuery(parser, name.data(), name.data() + name.size(), "data type", 0, data_type_max_parse_depth); + if (const auto * func = ast->as()) + { + if (func->parameters) + throw Exception("Data type cannot have multiple parenthesized parameters.", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); + + if (func->arguments) + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type with arguments is not supported yet, got {}.", func->name); + + return getClickHouseAliasFromOrName(func->name); + } + + if (const auto * ident = ast->as()) + { + return getClickHouseAliasFromOrName(ident->name()); + } + + if (const auto * lit = ast->as()) + { + if (lit->value.isNull()) + return "Null"; + } + + throw Exception("Unexpected AST element for data type.", ErrorCodes::UNEXPECTED_AST_STRUCTURE); +} /// proton: ends. DataTypePtr DataTypeFactory::get(const String & full_name/* proton: starts*/, bool compatible_with_clickhouse/* proton: ends*/) const @@ -83,7 +123,7 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr else family_name = getAliasToOrName(family_name_param); - if (endsWith(family_name, "_with_dictionary")) + if (endsWith(family_name, "_with_dictionary")/* proton: starts */ || (compatible_with_clickhouse && endsWith(family_name, "WithDictionary"))/* proton: ends */) { ASTPtr low_cardinality_params = std::make_shared(); String param_name = family_name.substr(0, family_name.size() - strlen("_with_dictionary")); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index eb8b09e0c1e..622b143fe0e 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -33,6 +33,7 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli /// proton: starts. DataTypePtr get(TypeIndex type) const; + String getClickHouseNameFromName(const String & name) const; /// proton: ends. DataTypePtr get(const String & full_name/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; diff --git a/src/DataTypes/DataTypeIPv4andIPv6.cpp b/src/DataTypes/DataTypeIPv4andIPv6.cpp index 39e87320425..d6d6937e4ae 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.cpp +++ b/src/DataTypes/DataTypeIPv4andIPv6.cpp @@ -13,7 +13,9 @@ void registerDataTypeIPv4andIPv6(DataTypeFactory & factory) factory.registerSimpleDataType("ipv6", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); factory.registerAlias("inet6", "ipv6", DataTypeFactory::CaseInsensitive); + factory.registerClickHouseAlias("IPv4", "ipv4"); factory.registerClickHouseAlias("INET4", "inet4"); + factory.registerClickHouseAlias("IPv6", "ipv6"); factory.registerClickHouseAlias("INET6", "inet6"); } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 30b5864c1af..7eaa8997a01 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -67,5 +67,7 @@ void registerDataTypeObject(DataTypeFactory & factory) factory.registerSimpleDataType( "json", [] { return std::make_shared("json", false); }, DataTypeFactory::CaseInsensitive); /// factory.registerSimpleDataType("nullable_json", [] { return std::make_shared("json", true); }, DataTypeFactory::CaseInsensitive); + + factory.registerClickHouseAlias("JSON", "json"); } } diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp index 9648f1f6bb8..d47eb6932b2 100644 --- a/src/DataTypes/DataTypeUUID.cpp +++ b/src/DataTypes/DataTypeUUID.cpp @@ -30,7 +30,7 @@ void registerDataTypeUUID(DataTypeFactory & factory) { factory.registerSimpleDataType("uuid", [] { return DataTypePtr(std::make_shared()); }); - factory.registerClickHouseAlias("Uuid", "uuid"); + factory.registerClickHouseAlias("UUID", "uuid"); } } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 90de5d0f14b..9d5ca6d7c60 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -114,12 +114,12 @@ void registerDataTypeDecimal(DataTypeFactory & factory) /// factory.registerAlias("NUMERIC", "decimal", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("FIXED", "decimal", DataTypeFactory::CaseInsensitive); - // factory.registerClickHouseAlias("Decimal32", "decimal32"); - // factory.registerClickHouseAlias("Decimal64", "decimal64"); - // factory.registerClickHouseAlias("Decimal128", "decimal128"); - // factory.registerClickHouseAlias("Decimal256", "decimal256"); + factory.registerClickHouseAlias("Decimal32", "decimal32"); + factory.registerClickHouseAlias("Decimal64", "decimal64"); + factory.registerClickHouseAlias("Decimal128", "decimal128"); + factory.registerClickHouseAlias("Decimal256", "decimal256"); - // factory.registerClickHouseAlias("Decimal", "decimal"); + factory.registerClickHouseAlias("Decimal", "decimal"); } /// Explicit template instantiations. diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 4dee1581a49..679af673758 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -112,9 +112,9 @@ void registerDataTypeDateTime(DataTypeFactory & factory) /// factory.registerAlias("TIMESTAMP", "datetime", DataTypeFactory::CaseInsensitive); - // factory.registerClickHouseAlias("Datetime", "datetime"); - // factory.registerClickHouseAlias("Datetime32", "datetime32"); - // factory.registerClickHouseAlias("Datetime64", "datetime64"); + factory.registerClickHouseAlias("Datetime", "datetime"); + factory.registerClickHouseAlias("Datetime32", "datetime32"); + factory.registerClickHouseAlias("Datetime64", "datetime64"); } } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index c92e6a80c1a..068682a04e3 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -140,7 +140,7 @@ Block NativeReader::read() /// Type String type_name; readStringBinary(type_name, istr); - column.type = data_type_factory.get(type_name/* proton: starts */, !!data_type_translator/* proton: ends */); + column.type = data_type_factory.get(type_name/* proton: starts */, compatible_with_clickhouse/* proton: ends */); setVersionToAggregateFunctions(column.type, true, server_revision); @@ -230,10 +230,4 @@ void NativeReader::updateAvgValueSizeHints(const Block & block) } } -/// proton: starts -void NativeReader::setDataTypeTranslator(IDataTypeTranslator * translator) -{ - data_type_translator = translator; -} -/// proton: ends } diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 60ea9ae1646..381e8a19778 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,7 @@ void NativeWriter::write(const Block & block) index_block.columns.resize(columns); } + const auto & factory = DataTypeFactory::instance(); for (size_t i = 0; i < columns; ++i) { /// For the index. @@ -110,6 +112,11 @@ void NativeWriter::write(const Block & block) /// Type String type_name = column.type->getName(); + /// proton: starts + if (compatible_with_clickhouse) + type_name = factory.getClickHouseNameFromName(type_name); + /// proton: ends + writeStringBinary(type_name, ostr); setVersionToAggregateFunctions(column.type, true, client_revision); diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 6631d2c42b5..d28104ccb8b 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -32,6 +32,10 @@ class NativeWriter static String getContentType() { return "application/octet-stream"; } + /// proton: starts + void setCompatibleWithClickHouse() { compatible_with_clickhouse = true; } + /// proton: end + private: WriteBuffer & ostr; Block header; @@ -40,6 +44,10 @@ class NativeWriter size_t initial_size_of_file; /// The initial size of the data file, if `append` done. Used for the index. /// If you need to write index, then `ostr` must be a CompressedWriteBuffer. CompressedWriteBuffer * ostr_concrete = nullptr; + + /// proton: starts + bool compatible_with_clickhouse {false}; + /// proton: ends }; } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 9b0eebe6eed..40dfb324af7 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -64,12 +64,12 @@ void ClickHouse::startup() SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) { - return std::make_shared(metadata_snapshot->getSampleBlock(), connection_params, logger); + return std::make_shared(metadata_snapshot->getSampleBlock(), connection_params, context, logger); } ColumnsDescription ClickHouse::getTableStructure() { - auto conn = std::make_shared( + auto conn = std::make_unique( connection_params.host, connection_params.port, connection_params.default_database, @@ -90,7 +90,7 @@ ColumnsDescription ClickHouse::getTableStructure() ColumnsDescription ret {}; - LibClient client {std::move(conn), connection_params.timeouts, context, logger}; + LibClient client {*conn, connection_params.timeouts, context, logger}; client.receiveResult({ .on_data = [this, &ret](Block & block) { diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index bc2fac31724..bd60ca662c9 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -1,8 +1,8 @@ #include #include #include +#include "Client/LibClient.h" #include "IO/WriteBufferFromString.h" -#include namespace DB { @@ -10,8 +10,15 @@ namespace DB namespace ExternalTable { -ClickHouseSink::ClickHouseSink(const Block & header, const ConnectionParameters & params_, Poco::Logger * logger_) - : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID), params(params_), logger(logger_) +ClickHouseSink::ClickHouseSink( + const Block & header, + const ConnectionParameters & params_, + ContextPtr & context_, + Poco::Logger * logger_) + : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) + , params(params_) + , context(context_) + , logger(logger_) { conn = std::make_unique( params.host, @@ -32,15 +39,48 @@ ClickHouseSink::ClickHouseSink(const Block & header, const ConnectionParameters void ClickHouseSink::consume(Chunk chunk) { LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); - ASTInsertQuery query{}; - query.setDatabase("default"); - query.setTable("my_first_table"); - - WriteBufferFromOwnString wb {}; - const IAST::FormatSettings fmt_settings {wb, true}; - ASTInsertQuery * query_ptr = &query; - dynamic_cast(query_ptr)->format(fmt_settings); - conn->sendQuery(params.timeouts, wb.str(), {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); + LibClient client {*conn, params.timeouts, context, logger}; + + String query = "INSERT INTO my_first_table (user_id, message, timestamp, metric) VALUES "; + for (size_t i = 0; i < chunk.rows(); ++i) + { + const auto & cols = chunk.getColumns(); + Field f {}; + cols[0]->get(i, f); + auto user_id = f.get(); + + cols[1]->get(i, f); + auto message = f.get(); + + cols[2]->get(i, f); + auto ts = f.get(); + + cols[3]->get(i, f); + auto metric = f.get(); + + query.append(fmt::format("({}, {}, {}, {})", user_id, quoteString(message), ts, metric)); + } + + LOG_INFO(logger, "sending query {}", query); + conn->sendQuery(params.timeouts, query, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); + LOG_INFO(logger, "query sent!"); + + client.receiveResult({ + .on_data = [this](Block & block) + { + LOG_INFO(logger, "INSERT INTO returns {} columns and {} rows", block.columns(), block.rows()); + if (!block.rows()) + return; + + const auto & cols = block.getColumns(); + for (size_t i = 0; i < block.rows(); ++i) + { + for (const auto & col : block.getColumns()) + LOG_INFO(logger, "row {}: col_name = {}", i, col->getName()); + } + } + }); + LOG_INFO(logger, "consume done!"); } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index 77d55fb03da..39c313746d9 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -12,7 +12,7 @@ namespace ExternalTable class ClickHouseSink final : public SinkToStorage { public: - ClickHouseSink(const Block & header, const ConnectionParameters & params_, Poco::Logger * logger_); + ClickHouseSink(const Block & header, const ConnectionParameters & params_, ContextPtr & context_, Poco::Logger * logger_); String getName() const override { return "ClickHouseSink"; } @@ -22,6 +22,7 @@ class ClickHouseSink final : public SinkToStorage const ConnectionParameters & params; std::unique_ptr conn; + ContextPtr & context; Poco::Logger * logger; }; From 35aa692b2d45ace1bd1175745b3b08de3759da37 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 04:07:10 -0800 Subject: [PATCH 08/26] SQL format issue --- src/Client/LibClient.cpp | 55 ++--------- src/Client/LibClient.h | 9 +- src/Databases/DatabaseOnDisk.cpp | 13 +++ src/Formats/FormatSettings.h | 3 + .../ParserCreateExternalTableQuery.cpp | 20 +++- .../Formats/Impl/ValuesRowOutputFormat.cpp | 4 + .../ExternalTable/ClickHouse/ClickHouse.cpp | 33 ++----- .../ExternalTable/ClickHouse/ClickHouse.h | 3 +- .../ClickHouse/ClickHouseSink.cpp | 96 ++++++++++++------- .../ExternalTable/ClickHouse/ClickHouseSink.h | 16 +++- .../ExternalTable/StorageExternalTable.cpp | 7 +- .../ExternalTable/StorageExternalTable.h | 1 + 12 files changed, 140 insertions(+), 120 deletions(-) diff --git a/src/Client/LibClient.cpp b/src/Client/LibClient.cpp index a82e5452358..315d4eb567e 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/LibClient.cpp @@ -11,10 +11,9 @@ extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_PACKET_FROM_SERVER; } -LibClient::LibClient(Connection & connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_) +LibClient::LibClient(Connection & connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_) : connection(connection_) , timeouts(timeouts_) - , context(context_) , logger(logger_) {} @@ -146,8 +145,11 @@ bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callb return true; case Protocol::Server::Exception: + server_exception.swap(packet.exception); + if (callbacks.on_receive_exception_from_server) - callbacks.on_receive_exception_from_server(std::move(packet.exception)); + callbacks.on_receive_exception_from_server(*server_exception); + return false; case Protocol::Server::Log: @@ -171,46 +173,9 @@ bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callb } } -// void LibClient::onProgress(const Progress & value) -// { -// LOG_INFO(logger, "onProgress called with read_rows = {}", value.read_rows); -// } -// -// void LibClient::onData(Block & block) -// { -// /// TBD -// } -// -// void LibClient::onLogData(Block & block) { -// LOG_INFO(logger, "onLogData called with columns = {}, rows = {}", block.columns(), block.rows()); -// } -// -// void LibClient::onTotals(Block & block) -// { -// LOG_INFO(logger, "onTotals called with columns = {}, rows = {}", block.columns(), block.rows()); -// } -// -// void LibClient::onExtremes(Block & block) -// { -// LOG_INFO(logger, "onExtremes called with columns = {}, rows = {}", block.columns(), block.rows()); -// } -// -// void LibClient::onReceiveExceptionFromServer(std::unique_ptr && e) -// { -// LOG_INFO(logger, "received server exception: {}", e->what()); -// } -// -// void LibClient::onProfileInfo(const ProfileInfo & profile_info) -// { -// LOG_INFO(logger, "received ProfileInfo: rows={}", profile_info.rows); -// } -// void LibClient::onEndOfStream() -// { -// LOG_INFO(logger, "received EndOfStream"); -// } -// void LibClient::onProfileEvents(Block & block) -// { -// LOG_INFO(logger, "received ProfileEvents rows = {}", block.rows()); -// } - +void LibClient::throwServerExceptionIfAny() +{ + if (server_exception) + server_exception->rethrow(); +} } diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index 53315c868dc..b6c0de47785 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include namespace DB @@ -14,7 +13,7 @@ struct Callbacks std::function on_log_data; std::function on_totals; std::function on_extremes; - std::function && e)> on_receive_exception_from_server; + std::function on_receive_exception_from_server; std::function on_profile_info; std::function on_end_of_stream; std::function on_profile_events; @@ -24,12 +23,14 @@ struct Callbacks class LibClient final { public: - LibClient(Connection & connection_, ConnectionTimeouts timeouts_, ContextPtr & context_, Poco::Logger * logger_); + LibClient(Connection & connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_); void executeQuery(String query, const Callbacks & callbacks); void receiveResult(const Callbacks & callbacks); void cancelQuery(); + void throwServerExceptionIfAny(); + private: bool receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks); @@ -37,8 +38,8 @@ class LibClient final ConnectionTimeouts timeouts; std::atomic_bool cancelled {false}; + std::unique_ptr server_exception {nullptr}; - ContextPtr & context [[maybe_unused]]; Poco::Logger * logger; }; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 709b643ba24..232d885aa16 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -23,6 +23,7 @@ #include /// proton: starts. +#include #include /// proton: ends. @@ -696,6 +697,18 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata( auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, /* hilite = */ false, "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + /// proton: starts + if (!ast) + { + ParserCreateExternalTableQuery ex_table_parser; + std::string err_msg; + pos = query.data(); + ast = tryParseQuery(ex_table_parser, pos, pos + query.size(), err_msg, /* hilite = */ false, + "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + LOG_ERROR(logger, "Failed to parse {} with CreateExternalTable parser: {}", metadata_file_path, err_msg); + } + /// proton: ends + if (!ast && throw_on_error) throw Exception(error_message, ErrorCodes::SYNTAX_ERROR); else if (!ast) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 73e30f81b8c..08ff2975483 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -230,6 +230,9 @@ struct FormatSettings bool interpret_expressions = true; bool deduce_templates_of_expressions = true; bool accurate_types_of_literals = true; + /// proton: starts + bool no_commas_between_rows = false; + /// proton: ends } values; struct diff --git a/src/Parsers/ParserCreateExternalTableQuery.cpp b/src/Parsers/ParserCreateExternalTableQuery.cpp index 5d67859a9ba..8c0cddd9c3b 100644 --- a/src/Parsers/ParserCreateExternalTableQuery.cpp +++ b/src/Parsers/ParserCreateExternalTableQuery.cpp @@ -11,6 +11,7 @@ namespace DB bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected, [[ maybe_unused ]] bool hint) { ParserKeyword s_create("CREATE"); + ParserKeyword s_attach("ATTACH"); ParserKeyword s_or_replace("OR REPLACE"); ParserKeyword s_external_table("EXTERNAL TABLE"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); @@ -22,15 +23,20 @@ bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Exp ASTPtr table; ASTPtr settings; + bool attach = false; bool or_replace = false; bool if_not_exists = false; - if (!s_create.ignore(pos, expected)) + if (s_create.ignore(pos, expected)) + { + if (s_or_replace.ignore(pos, expected)) + or_replace = true; + } + else if (s_attach.ignore(pos, expected)) + attach = true; + else return false; - if (s_or_replace.ignore(pos, expected)) - or_replace = true; - if (!s_external_table.ignore(pos, expected)) return false; @@ -49,12 +55,18 @@ bool DB::ParserCreateExternalTableQuery::parseImpl(Pos & pos, ASTPtr & node, Exp auto create_query = std::make_shared(); node = create_query; + create_query->is_external = true; create_query->create_or_replace = or_replace; create_query->if_not_exists = if_not_exists; auto * table_id = table->as(); create_query->database = table_id->getDatabase(); create_query->table = table_id->getTable(); + if (attach) + { + create_query->uuid = table_id->uuid; + create_query->attach = attach; + } if (create_query->database) create_query->children.push_back(create_query->database); if (create_query->table) diff --git a/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp b/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp index abe7c42caae..4f9e437f5ac 100644 --- a/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp @@ -37,6 +37,10 @@ void ValuesRowOutputFormat::writeRowEndDelimiter() void ValuesRowOutputFormat::writeRowBetweenDelimiter() { + /// proton: starts + if (format_settings.values.no_commas_between_rows) + return; + /// proton: ends writeCString(",", out); } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 40dfb324af7..68b96ea7c5d 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -11,10 +11,9 @@ namespace DB namespace ExternalTable { -ClickHouse::ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_) - : table(settings->table.value) - , context(context_) - , logger(&Poco::Logger::get("External-" + settings->address.value + "-" + settings->table.value)) +ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings, ContextPtr & /*context*/) + : table(settings->table.changed ? settings->table.value : name) + , logger(&Poco::Logger::get("ExternalTable-ClickHouse-" + table)) { assert(settings->type.value == "clickhouse"); @@ -41,30 +40,12 @@ ClickHouse::ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_) void ClickHouse::startup() { -#ifdef GO_ON_PRODUCTIOn - client.executeQuery("DESCRIBE TABLE " + table, { - .on_data = [this](Block & block) - { - LOG_INFO(logger, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); - auto cols = block.getColumns(); - for (size_t i = 0; i < block.rows(); ++i) - { - String msg = "row " + std::to_string(i) + " :"; - for (const auto & col : cols) - { - msg += col->getName() + ": "; - msg += (*col)[i].getTypeName(); - } - } - } - }); -#endif LOG_INFO(logger, "startup"); } -SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) +SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { - return std::make_shared(metadata_snapshot->getSampleBlock(), connection_params, context, logger); + return std::make_shared(table, metadata_snapshot->getSampleBlock(), connection_params, context, logger); } ColumnsDescription ClickHouse::getTableStructure() @@ -90,7 +71,7 @@ ColumnsDescription ClickHouse::getTableStructure() ColumnsDescription ret {}; - LibClient client {*conn, connection_params.timeouts, context, logger}; + LibClient client {*conn, connection_params.timeouts, logger}; client.receiveResult({ .on_data = [this, &ret](Block & block) { @@ -121,6 +102,8 @@ ColumnsDescription ClickHouse::getTableStructure() } }); + client.throwServerExceptionIfAny(); + return ret; } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h index 287b62f01a9..d8c8c5a2f36 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -13,7 +13,7 @@ namespace ExternalTable class ClickHouse final : public IExternalTable { public: - explicit ClickHouse(ExternalTableSettingsPtr settings, ContextPtr & context_); + explicit ClickHouse(const String & name, ExternalTableSettingsPtr settings, ContextPtr & context [[maybe_unused]]); void startup() override; void shutdown() override {} @@ -26,7 +26,6 @@ class ClickHouse final : public IExternalTable ConnectionParameters connection_params; String table; - ContextPtr & context; Poco::Logger * logger; }; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index bd60ca662c9..13580aeea89 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -1,25 +1,35 @@ #include #include -#include -#include "Client/LibClient.h" -#include "IO/WriteBufferFromString.h" +#include +#include +#include +#include "Interpreters/Context.h" namespace DB - { +{ namespace ExternalTable { ClickHouseSink::ClickHouseSink( - const Block & header, - const ConnectionParameters & params_, - ContextPtr & context_, - Poco::Logger * logger_) + const String & table, + const Block & header, + const ConnectionParameters & params_, + ContextPtr & context_, + Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) , params(params_) , context(context_) , logger(logger_) { + const auto & col_names = header.getNames(); + assert(!col_names.empty()); + + insert_into = "INSERT INTO " + backQuoteIfNeed(table) + " (" + backQuoteIfNeed(col_names[0]); + for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) + insert_into.append(", " + backQuoteIfNeed(name)); + insert_into.append(") VALUES "); + conn = std::make_unique( params.host, params.port, @@ -34,52 +44,70 @@ ClickHouseSink::ClickHouseSink( params.security); conn->setCompatibleWithClickHouse(); + + // buf = std::make_unique(oss); + buf = std::make_unique(); + auto format_settings = getFormatSettings(context); + format_settings.values.no_commas_between_rows = true; + output_format = FormatFactory::instance().getOutputFormat("Values", *buf, header, context, {}, format_settings); + output_format->setAutoFlush(); + + LOG_INFO(logger, "ClickHouseSink is read to send data to table {}", table); } -void ClickHouseSink::consume(Chunk chunk) +namespace { - LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); - LibClient client {*conn, params.timeouts, context, logger}; - - String query = "INSERT INTO my_first_table (user_id, message, timestamp, metric) VALUES "; - for (size_t i = 0; i < chunk.rows(); ++i) - { - const auto & cols = chunk.getColumns(); - Field f {}; - cols[0]->get(i, f); - auto user_id = f.get(); - cols[1]->get(i, f); - auto message = f.get(); +class BufferResetter +{ +public: +explicit BufferResetter(WriteBufferFromOwnString & buf_): buf(buf_) {} +~BufferResetter() { buf.restart(); } - cols[2]->get(i, f); - auto ts = f.get(); +private: + WriteBufferFromOwnString & buf; +}; - cols[3]->get(i, f); - auto metric = f.get(); +} - query.append(fmt::format("({}, {}, {}, {})", user_id, quoteString(message), ts, metric)); +void ClickHouseSink::consume(Chunk chunk) +{ + /// Empty chunks are acting heartbeats + if (!chunk.rows()) + { + conn->checkConnected(); /// ping to keep connection alive + return; } - LOG_INFO(logger, "sending query {}", query); - conn->sendQuery(params.timeouts, query, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); + LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); + + BufferResetter reset_buffer(*buf); /// makes sure buf gets reset afterwards + buf->write(insert_into.data(), insert_into.size()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + output_format->write(block); + + String query_to_sent {buf->buffer().begin(), buf->offset()}; + LOG_INFO(logger, "sending query {}", query_to_sent); + conn->sendQuery(params.timeouts, query_to_sent, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); LOG_INFO(logger, "query sent!"); + LibClient client {*conn, params.timeouts, logger}; client.receiveResult({ - .on_data = [this](Block & block) + .on_data = [this](Block & block_) { - LOG_INFO(logger, "INSERT INTO returns {} columns and {} rows", block.columns(), block.rows()); - if (!block.rows()) + LOG_INFO(logger, "INSERT INTO returns {} columns and {} rows", block_.columns(), block_.rows()); + if (!block_.rows()) return; - const auto & cols = block.getColumns(); - for (size_t i = 0; i < block.rows(); ++i) + const auto & cols = block_.getColumns(); + for (size_t i = 0; i < block_.rows(); ++i) { - for (const auto & col : block.getColumns()) + for (const auto & col : block_.getColumns()) LOG_INFO(logger, "row {}: col_name = {}", i, col->getName()); } } }); + client.throwServerExceptionIfAny(); LOG_INFO(logger, "consume done!"); } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index 39c313746d9..de5cf4d45af 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -1,7 +1,9 @@ #pragma once #include +#include #include +// #include "IO/WriteBufferFromOStream.h" namespace DB { @@ -12,16 +14,28 @@ namespace ExternalTable class ClickHouseSink final : public SinkToStorage { public: - ClickHouseSink(const Block & header, const ConnectionParameters & params_, ContextPtr & context_, Poco::Logger * logger_); + ClickHouseSink( + const String & table, + const Block & header, + const ConnectionParameters & params_, + ContextPtr & context_, + Poco::Logger * logger_); String getName() const override { return "ClickHouseSink"; } void consume(Chunk chunk) override; private: + String insert_into; + const ConnectionParameters & params; std::unique_ptr conn; + // std::ostringstream oss; + // std::unique_ptr buf; + std::unique_ptr buf; + OutputFormatPtr output_format; + ContextPtr & context; Poco::Logger * logger; }; diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index a5e0d602924..a3785e21fb6 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -1,10 +1,7 @@ -#include -#include -#include #include #include #include -#include "Storages/ExternalTable/ClickHouse/ClickHouse.h" +#include namespace DB { @@ -19,7 +16,7 @@ StorageExternalTable::StorageExternalTable( if (type == "clickhouse") { auto ctx = getContext(); - external_table = std::make_unique(std::move(settings), ctx); + external_table = std::make_unique(args.table_id.getTableName(), std::move(settings), ctx); } else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown external table type: {}", type); diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index c702c86a30c..4e5f4ea24c4 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -20,6 +20,7 @@ class StorageExternalTable final : public shared_ptr_helperstartup(); } void shutdown() override { external_table->shutdown(); } From 1c7d2a11b526f5a5714590b3702ea1f704e4b0ce Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 12:52:56 -0800 Subject: [PATCH 09/26] fixed empty values --- src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index 13580aeea89..954237edb64 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -86,7 +86,7 @@ void ClickHouseSink::consume(Chunk chunk) auto block = getHeader().cloneWithColumns(chunk.detachColumns()); output_format->write(block); - String query_to_sent {buf->buffer().begin(), buf->offset()}; + String query_to_sent {buf->str()}; LOG_INFO(logger, "sending query {}", query_to_sent); conn->sendQuery(params.timeouts, query_to_sent, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); LOG_INFO(logger, "query sent!"); From cea1665e0ce08bddea7b5ccaf764efc61ed1ee5e Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 13:36:32 -0800 Subject: [PATCH 10/26] more fixes --- .../ClickHouseDataTypeTranslator.cpp | 118 ------------------ src/DataTypes/ClickHouseDataTypeTranslator.h | 23 ---- src/Interpreters/InterpreterCreateQuery.cpp | 7 +- .../ExternalTable/ClickHouse/ClickHouse.cpp | 1 - .../ClickHouse/ClickHouseSink.cpp | 4 +- .../Streaming/StorageMaterializedView.cpp | 3 +- 6 files changed, 8 insertions(+), 148 deletions(-) delete mode 100644 src/DataTypes/ClickHouseDataTypeTranslator.cpp delete mode 100644 src/DataTypes/ClickHouseDataTypeTranslator.h diff --git a/src/DataTypes/ClickHouseDataTypeTranslator.cpp b/src/DataTypes/ClickHouseDataTypeTranslator.cpp deleted file mode 100644 index ea2d11d8a3a..00000000000 --- a/src/DataTypes/ClickHouseDataTypeTranslator.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include - -namespace DB -{ - -ClickHouseDataTypeTranslator & ClickHouseDataTypeTranslator::instance() -{ - static ClickHouseDataTypeTranslator ret; - return ret; -} - -ClickHouseDataTypeTranslator::ClickHouseDataTypeTranslator() -{ - /// referece: DataTypeFactory - type_dict = { - {"UInt8", "uint8"}, - {"UInt16", "uint16"}, - {"UInt32", "uint32"}, - {"UInt64", "uint64"}, - - {"Int8", "int8"}, - {"Int16", "int16"}, - {"Int32", "int32"}, - {"Int64", "int64"}, - - {"Float32", "float32"}, - {"Float64", "float64"}, - - {"UInt128", "uint128"}, - {"UInt256", "uint256"}, - - {"Int128", "int128"}, - {"Int256", "int256"}, - - {"BYTE", "byte"}, - {"SMALLINT", "smallint"}, - {"INT", "int"}, - {"UINT", "uint"}, - {"INTEGER", "integer"}, - {"BIGINT", "bigint"}, - {"FLOAT", "float"}, - {"DOUBLE", "double"}, - - {"Decimal32", "decimal32"}, - {"Decimal64", "decimal64"}, - {"Decimal128", "decimal128"}, - {"Decimal256", "decimal256"}, - {"Decimal", "decimal"}, - - {"Date", "date"}, - {"Date32", "date32"}, - - {"DateTime", "datetime"}, - {"DateTime32", "datetime32"}, - {"DateTime64", "datetime64"}, - - {"String", "string"}, - {"VARCHAR", "VARCHAR"}, - - {"FixedString", "fixed_string"}, - - {"Enum8", "enum8"}, - {"Enum16", "enum16"}, - {"Enum", "enum"}, - - {"Array", "array"}, - - {"Tuple", "tuple"}, - - {"Nullable", "nullable"}, - - {"Nothing", "nothing"}, - - {"UUID", "uuid"}, - - {"IPv4", "ipv4"}, - {"INET", "inet"}, - {"IPv6", "ipv6"}, - {"INET6", "inet6"}, - - {"AggregateFunction", "aggregate_function"}, - - {"Nested", "nested"}, - - {"IntervalNanosecond", "interval_nanosecond"}, - {"IntervalMicrosecond", "interval_microsecond"}, - {"IntervalMillisecond", "interval_millisecond"}, - {"IntervalSecond", "interval_second"}, - {"IntervalMinute", "interval_minute"}, - {"IntervalHour", "interval_hour"}, - {"IntervalDay", "interval_day"}, - {"IntervalWeek", "interval_week"}, - {"IntervalMonth", "interval_month"}, - {"IntervalQuarter", "interval_quarter"}, - {"IntervalYear", "interval_year"}, - - {"LowCardinality", "low_cardinality"}, - - {"Bool", "bool"}, - - {"SimpleAggregateFunction", "simple_aggregate_function"}, - - {"Map", "map"}, - - {"JSON", "json"}, - }; -} - -std::string ClickHouseDataTypeTranslator::translate(const std::string & type_name) -{ - auto it = type_dict.find(type_name); - if (it == type_dict.end()) - return type_name; - - return it->second; -} - -} diff --git a/src/DataTypes/ClickHouseDataTypeTranslator.h b/src/DataTypes/ClickHouseDataTypeTranslator.h deleted file mode 100644 index 35d8c1872dd..00000000000 --- a/src/DataTypes/ClickHouseDataTypeTranslator.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class ClickHouseDataTypeTranslator final : public IDataTypeTranslator -{ -public: - static ClickHouseDataTypeTranslator & instance(); - - ~ClickHouseDataTypeTranslator() override = default; - - std::string translate(const std::string & type_name) override; - -private: - ClickHouseDataTypeTranslator(); - - std::unordered_map type_dict; -}; - -} diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 9cffb39a70a..fa3572cca56 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -892,6 +892,9 @@ void InterpreterCreateQuery::handleExternalStreamCreation(ASTCreateQuery & creat if (!create.is_external) return; + if (create.storage && create.storage->engine->name == "ExternalTable") + return; + auto sharding_expr_field = Field(""); String sharding_expr; @@ -916,10 +919,6 @@ void InterpreterCreateQuery::handleExternalStreamCreation(ASTCreateQuery & creat create.storage->set(create.storage->engine, makeASTFunction("ExternalStream", sharding_expr_ast)); } - - - if (create.storage->engine->name != "ExternalStream") - throw Exception(ErrorCodes::INCORRECT_QUERY, "External stream requires ExternalStream engine"); } /// proton: ends diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 68b96ea7c5d..87ce8cb9e6a 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index 954237edb64..a1bc6e5327f 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -75,7 +75,7 @@ void ClickHouseSink::consume(Chunk chunk) /// Empty chunks are acting heartbeats if (!chunk.rows()) { - conn->checkConnected(); /// ping to keep connection alive + // conn->checkConnected(); /// ping to keep connection alive return; } @@ -88,6 +88,8 @@ void ClickHouseSink::consume(Chunk chunk) String query_to_sent {buf->str()}; LOG_INFO(logger, "sending query {}", query_to_sent); + + conn->forceConnected(params.timeouts); /// The connection chould have been idle for too long conn->sendQuery(params.timeouts, query_to_sent, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); LOG_INFO(logger, "query sent!"); diff --git a/src/Storages/Streaming/StorageMaterializedView.cpp b/src/Storages/Streaming/StorageMaterializedView.cpp index 942e8603402..fed2501ea8b 100644 --- a/src/Storages/Streaming/StorageMaterializedView.cpp +++ b/src/Storages/Streaming/StorageMaterializedView.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -201,7 +202,7 @@ StorageMaterializedView::StorageMaterializedView( if (!target_table) throw Exception(ErrorCodes::INCORRECT_QUERY, "Target stream is not found", target_table_id.getFullTableName()); - if (!target_table->as() && !target_table->as()) + if (!target_table->as() && !target_table->as() && !target_table->as()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MaterializedView doesn't support target storage is {}", target_table->getName()); } } From a7e840e3a34307d9284f69db9b38e04a2dc0fd4f Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 14:58:16 -0800 Subject: [PATCH 11/26] complex data types --- src/DataTypes/DataTypeAggregateFunction.cpp | 4 ++-- src/DataTypes/DataTypeArray.cpp | 4 ++-- .../DataTypeCustomSimpleAggregateFunction.cpp | 6 +++--- src/DataTypes/DataTypeEnum.cpp | 8 ++++---- src/DataTypes/DataTypeFactory.cpp | 10 +++++----- src/DataTypes/DataTypeFactory.h | 4 ++-- src/DataTypes/DataTypeFixedString.cpp | 2 +- src/DataTypes/DataTypeLowCardinality.cpp | 4 ++-- src/DataTypes/DataTypeMap.cpp | 4 ++-- src/DataTypes/DataTypeNested.cpp | 4 ++-- src/DataTypes/DataTypeNullable.cpp | 4 ++-- src/DataTypes/DataTypeString.cpp | 2 +- src/DataTypes/DataTypeTuple.cpp | 6 +++--- src/DataTypes/DataTypesDecimal.cpp | 4 ++-- src/DataTypes/DataTypesNumber.cpp | 3 +-- src/DataTypes/registerDataTypeDateTime.cpp | 6 +++--- 16 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 5c0c8e436e3..87119cc4536 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -159,7 +159,7 @@ SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { String function_name; AggregateFunctionPtr function; @@ -228,7 +228,7 @@ static DataTypePtr create(const ASTPtr & arguments) ErrorCodes::BAD_ARGUMENTS); for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i) - argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); + argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i], compatible_with_clickhouse)); if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index cf9ada743e4..5a95dff04ff 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -59,12 +59,12 @@ size_t DataTypeArray::getNumberOfDimensions() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("array data type family must have exactly one argument - type of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return std::make_shared(DataTypeFactory::instance().get(arguments->children[0])); + return std::make_shared(DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse)); } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 1bd3268cabe..e9552c9049d 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -68,7 +68,7 @@ String DataTypeCustomSimpleAggregateFunction::getName() const } -static std::pair create(const ASTPtr & arguments) +static std::pair create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { String function_name; AggregateFunctionPtr function; @@ -119,7 +119,7 @@ static std::pair create(const ASTPtr & argum ErrorCodes::BAD_ARGUMENTS); for (size_t i = 1; i < arguments->children.size(); ++i) - argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); + argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i], compatible_with_clickhouse)); if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); @@ -129,7 +129,7 @@ static std::pair create(const ASTPtr & argum DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function); - DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()); + DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName(), compatible_with_clickhouse); if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) { diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index d651adb3b5e..1c371520124 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -239,7 +239,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) } template -static DataTypePtr createExact(const ASTPtr & arguments) +static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.empty()) throw Exception("Data type enum cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -279,7 +279,7 @@ static DataTypePtr createExact(const ASTPtr & arguments) return std::make_shared(values); } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) throw Exception("Data type enum cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -301,10 +301,10 @@ static DataTypePtr create(const ASTPtr & arguments) Int64 value = value_literal->value.get(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) - return createExact(arguments); + return createExact(arguments, compatible_with_clickhouse); } - return createExact(arguments); + return createExact(arguments, compatible_with_clickhouse); } void registerDataTypeEnum(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index fb6bf1e0e06..ace01ffef18 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -140,7 +140,7 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr return get("low_cardinality", low_cardinality_params); } - return findCreatorByName(family_name)(parameters); + return findCreatorByName(family_name)(parameters, compatible_with_clickhouse); } DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const @@ -182,7 +182,7 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator throw Exception("DataTypeFactory: the data type " + name + " has been provided " " a null constructor", ErrorCodes::LOGICAL_ERROR); - registerDataType(name, [name, creator](const ASTPtr & ast) + registerDataType(name, [name, creator](const ASTPtr & ast/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]]/* proton: ends */) { if (ast) throw Exception("Data type " + name + " cannot have arguments", ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS); @@ -192,9 +192,9 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness) { - registerDataType(family_name, [creator](const ASTPtr & ast) + registerDataType(family_name, [creator](const ASTPtr & ast/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]]/* proton: ends */) { - auto res = creator(ast); + auto res = creator(ast, compatible_with_clickhouse); res.first->setCustomization(std::move(res.second)); return res.first; @@ -203,7 +203,7 @@ void DataTypeFactory::registerDataTypeCustom(const String & family_name, Creator void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness) { - registerDataTypeCustom(name, [creator](const ASTPtr & /*ast*/) + registerDataTypeCustom(name, [creator](const ASTPtr & /*ast*//* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]]/* proton: ends */) { return creator(); }, case_sensitiveness); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 622b143fe0e..4d349a4731c 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -20,12 +20,12 @@ using DataTypePtr = std::shared_ptr; /** Creates a data type by name of data type family and parameters. */ -class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAliases> +class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAliases> { private: using SimpleCreator = std::function; using DataTypesDictionary = std::unordered_map; - using CreatorWithCustom = std::function(const ASTPtr & parameters)>; + using CreatorWithCustom = std::function(const ASTPtr & parameters, bool compatible_with_clickhouse [[maybe_unused]])>; using SimpleCreatorWithCustom = std::function()>; public: diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 53cca18e8e2..54f9a493d58 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -44,7 +44,7 @@ SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("The fixed_string data type family must have exactly one argument - size in bytes", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index e134c99d777..aeb788107ce 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -150,13 +150,13 @@ SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("The low_cardinality data type family must have single argument - type of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return std::make_shared(DataTypeFactory::instance().get(arguments->children[0])); + return std::make_shared(DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse)); } void registerDataTypeLowCardinality(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index d425ec1e54f..fd4aa700f97 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -127,7 +127,7 @@ bool DataTypeMap::checkKeyType(DataTypePtr key_type) return true; } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 2) throw Exception("The map data type family must have two arguments: key and value types", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -136,7 +136,7 @@ static DataTypePtr create(const ASTPtr & arguments) nested_types.reserve(arguments->children.size()); for (const ASTPtr & child : arguments->children) - nested_types.emplace_back(DataTypeFactory::instance().get(child)); + nested_types.emplace_back(DataTypeFactory::instance().get(child, compatible_with_clickhouse)); return std::make_shared(nested_types); } diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index 04626be209b..46a6d708d11 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -32,7 +32,7 @@ String DataTypeNestedCustomName::getName() const return s.str(); } -static std::pair create(const ASTPtr & arguments) +static std::pair create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) throw Exception("The nested cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -48,7 +48,7 @@ static std::pair create(const ASTPtr & argum if (!name_type) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type"); - auto nested_type = DataTypeFactory::instance().get(name_type->type); + auto nested_type = DataTypeFactory::instance().get(name_type->type, compatible_with_clickhouse); nested_types.push_back(std::move(nested_type)); nested_names.push_back(name_type->name); } diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index c8dbe17070a..a598d04e87d 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -61,12 +61,12 @@ SerializationPtr DataTypeNullable::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("Nullable data type family must have exactly one argument - nested type", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0]); + DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse); return std::make_shared(nested_type); } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 3c914e6a545..0bedb7d2c52 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -39,7 +39,7 @@ SerializationPtr DataTypeString::doGetDefaultSerialization() const return std::make_shared(); } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (arguments && !arguments->children.empty()) { diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 926f472021c..edad7ec596b 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -339,7 +339,7 @@ SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) throw Exception("The tuple cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -354,11 +354,11 @@ static DataTypePtr create(const ASTPtr & arguments) { if (const auto * name_and_type_pair = child->as()) { - nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type)); + nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type, compatible_with_clickhouse)); names.emplace_back(name_and_type_pair->name); } else - nested_types.emplace_back(DataTypeFactory::instance().get(child)); + nested_types.emplace_back(DataTypeFactory::instance().get(child, compatible_with_clickhouse)); } if (names.empty()) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 9d5ca6d7c60..3064ab9f22c 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -65,7 +65,7 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.size() != 2) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", @@ -85,7 +85,7 @@ static DataTypePtr create(const ASTPtr & arguments) } template -static DataTypePtr createExact(const ASTPtr & arguments) +static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index fa9a88002b4..e5a64e3924b 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes } template -static DataTypePtr createNumericDataType(const ASTPtr & arguments) +static DataTypePtr createNumericDataType(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (arguments) { @@ -32,7 +32,6 @@ static DataTypePtr createNumericDataType(const ASTPtr & arguments) return std::make_shared>(); } - void registerDataTypeNumbers(DataTypeFactory & factory) { factory.registerDataType("uint8", createNumericDataType); diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 679af673758..4da3db21979 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -58,7 +58,7 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume return argument->value.get(); } -static DataTypePtr create(const ASTPtr & arguments) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]]/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(); @@ -77,7 +77,7 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(timezone.value_or(String{})); } -static DataTypePtr create32(const ASTPtr & arguments) +static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(); @@ -90,7 +90,7 @@ static DataTypePtr create32(const ASTPtr & arguments) return std::make_shared(timezone); } -static DataTypePtr create64(const ASTPtr & arguments) +static DataTypePtr create64(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(DataTypeDateTime64::default_scale); From f3b3e19b9d19a03bd8a4f20ebc9d1f615b95cadb Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 15:10:18 -0800 Subject: [PATCH 12/26] regression: create external stream --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index fa3572cca56..63a9dfec8c0 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -892,7 +892,9 @@ void InterpreterCreateQuery::handleExternalStreamCreation(ASTCreateQuery & creat if (!create.is_external) return; - if (create.storage && create.storage->engine->name == "ExternalTable") + if (create.storage + && create.storage->engine + && create.storage->engine->name == "ExternalTable") return; auto sharding_expr_field = Field(""); From bdecdfe6bda9f87460617e7fed78086f34afab4c Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 15:37:44 -0800 Subject: [PATCH 13/26] cleanup --- src/Client/LibClient.h | 4 +-- .../ExternalTable/ClickHouse/ClickHouse.cpp | 15 ++++----- .../ClickHouse/ClickHouseSink.cpp | 33 +++---------------- .../ExternalTable/ClickHouse/ClickHouseSink.h | 3 -- 4 files changed, 12 insertions(+), 43 deletions(-) diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index b6c0de47785..ac274bcc1eb 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -25,8 +25,8 @@ class LibClient final public: LibClient(Connection & connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_); - void executeQuery(String query, const Callbacks & callbacks); - void receiveResult(const Callbacks & callbacks); + void executeQuery(String query, const Callbacks & callbacks = {}); + void receiveResult(const Callbacks & callbacks = {}); void cancelQuery(); void throwServerExceptionIfAny(); diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 87ce8cb9e6a..e45550a957f 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -31,9 +31,10 @@ ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings, C connection_params.password = settings->password.value; connection_params.default_database = "default"; connection_params.timeouts = { - 10 * 60 * 1'000'000 /*connection_timeout_*/, - 10 * 60 * 1'000'000 /*send_timeout_*/, - 10 * 60 * 1'000'000 /*receive_timeout_*/ + /*connection_timeout_=*/ 1 * 60 * 1'000'000, + /*send_timeout_=*/ 1 * 60 * 1'000'000, + /*receive_timeout_=*/ 1 * 60 * 1'000'000, + /*tcp_keep_alive_timeout_=*/ 10 * 60 * 1'000'000 }; } @@ -64,17 +65,15 @@ ColumnsDescription ClickHouse::getTableStructure() conn->setCompatibleWithClickHouse(); - LOG_INFO(logger, "executing SQL: DESCRIBE TABLE {}", table); + LOG_INFO(logger, "DESCRIBE TABLE {}", table); conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); - LOG_INFO(logger, "receiving data"); ColumnsDescription ret {}; LibClient client {*conn, connection_params.timeouts, logger}; client.receiveResult({ - .on_data = [this, &ret](Block & block) + .on_data = [&ret](Block & block) { - LOG_INFO(logger, "DESCRIBE TABLE returns {} columns and {} rows", block.columns(), block.rows()); if (!block.rows()) return; @@ -95,14 +94,12 @@ ColumnsDescription ClickHouse::getTableStructure() const auto & col = block.getByName("comment"); col_desc.comment = col.column->getDataAt(i).toString(); } - LOG_INFO(logger, "row {}: col_name = {}, col_type = {}", i, col_desc.name, col_desc.type); ret.add(col_desc, String(), false, false); } } }); client.throwServerExceptionIfAny(); - return ret; } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index a1bc6e5327f..59a125dd118 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -1,9 +1,9 @@ #include -#include #include #include +#include #include -#include "Interpreters/Context.h" +#include namespace DB { @@ -45,14 +45,13 @@ ClickHouseSink::ClickHouseSink( conn->setCompatibleWithClickHouse(); - // buf = std::make_unique(oss); buf = std::make_unique(); auto format_settings = getFormatSettings(context); format_settings.values.no_commas_between_rows = true; output_format = FormatFactory::instance().getOutputFormat("Values", *buf, header, context, {}, format_settings); output_format->setAutoFlush(); - LOG_INFO(logger, "ClickHouseSink is read to send data to table {}", table); + LOG_INFO(logger, "ready to send data to ClickHouse table {} with {}", table, insert_into); } namespace @@ -72,14 +71,8 @@ explicit BufferResetter(WriteBufferFromOwnString & buf_): buf(buf_) {} void ClickHouseSink::consume(Chunk chunk) { - /// Empty chunks are acting heartbeats if (!chunk.rows()) - { - // conn->checkConnected(); /// ping to keep connection alive return; - } - - LOG_INFO(logger, "consuming from chunk contains {} rows", chunk.rows()); BufferResetter reset_buffer(*buf); /// makes sure buf gets reset afterwards buf->write(insert_into.data(), insert_into.size()); @@ -87,30 +80,12 @@ void ClickHouseSink::consume(Chunk chunk) output_format->write(block); String query_to_sent {buf->str()}; - LOG_INFO(logger, "sending query {}", query_to_sent); - conn->forceConnected(params.timeouts); /// The connection chould have been idle for too long conn->sendQuery(params.timeouts, query_to_sent, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); - LOG_INFO(logger, "query sent!"); LibClient client {*conn, params.timeouts, logger}; - client.receiveResult({ - .on_data = [this](Block & block_) - { - LOG_INFO(logger, "INSERT INTO returns {} columns and {} rows", block_.columns(), block_.rows()); - if (!block_.rows()) - return; - - const auto & cols = block_.getColumns(); - for (size_t i = 0; i < block_.rows(); ++i) - { - for (const auto & col : block_.getColumns()) - LOG_INFO(logger, "row {}: col_name = {}", i, col->getName()); - } - } - }); + client.receiveResult(); client.throwServerExceptionIfAny(); - LOG_INFO(logger, "consume done!"); } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index de5cf4d45af..ce27859de02 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -3,7 +3,6 @@ #include #include #include -// #include "IO/WriteBufferFromOStream.h" namespace DB { @@ -31,8 +30,6 @@ class ClickHouseSink final : public SinkToStorage const ConnectionParameters & params; std::unique_ptr conn; - // std::ostringstream oss; - // std::unique_ptr buf; std::unique_ptr buf; OutputFormatPtr output_format; From d185a06cc521172d1f5fefaa8e97ebce99cc3c04 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Sun, 28 Jan 2024 19:04:47 -0800 Subject: [PATCH 14/26] secure connection --- src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp | 3 ++- src/Storages/ExternalTable/ExternalTableSettings.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index e45550a957f..4c4a260bff2 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -29,7 +29,8 @@ ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings, C connection_params.port = port; connection_params.user = settings->user.value; connection_params.password = settings->password.value; - connection_params.default_database = "default"; + connection_params.default_database = settings->database.value; + connection_params.security = settings->secure.value ? Protocol::Secure::Enable : Protocol::Secure::Disable; connection_params.timeouts = { /*connection_timeout_=*/ 1 * 60 * 1'000'000, /*send_timeout_=*/ 1 * 60 * 1'000'000, diff --git a/src/Storages/ExternalTable/ExternalTableSettings.h b/src/Storages/ExternalTable/ExternalTableSettings.h index 5f76c9179b8..cc00f1b281b 100644 --- a/src/Storages/ExternalTable/ExternalTableSettings.h +++ b/src/Storages/ExternalTable/ExternalTableSettings.h @@ -11,8 +11,10 @@ class ASTStorage; M(String, type, "", "External table type", 0) \ /* ClickHouse settings */ \ M(String, address, "", "The address of the ClickHouse server to connect", 0) \ - M(String, user, "", "The user to be used to connect to the ClickHouse server", 0) \ + M(String, user, "default", "The user to be used to connect to the ClickHouse server", 0) \ M(String, password, "", "The password to be used to connect to the ClickHouse server", 0) \ + M(Bool, secure, false, "Indicates if it uses TLS connection", 0) \ + M(String, database, "default", "The datababse to connect to", 0) \ M(String, table, "", "The ClickHouse table to which the external table is mapped", 0) DECLARE_SETTINGS_TRAITS(ExternalTableSettingsTraits, LIST_OF_EXTERNAL_TABLE_SETTINGS) From 605802ed55dbf5d0f303f656700e5155388c0ba3 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Mon, 29 Jan 2024 16:36:56 -0800 Subject: [PATCH 15/26] source --- src/Client/LibClient.cpp | 131 ++++++++++-------- src/Client/LibClient.h | 38 +++-- src/Processors/ProcessorID.h | 3 + .../ExternalTable/ClickHouse/ClickHouse.cpp | 102 ++++++++------ .../ExternalTable/ClickHouse/ClickHouse.h | 9 ++ .../ClickHouse/ClickHouseSink.cpp | 29 ++-- .../ExternalTable/ClickHouse/ClickHouseSink.h | 4 +- .../ClickHouse/ClickHouseSource.cpp | 58 ++++++++ .../ClickHouse/ClickHouseSource.h | 37 +++++ .../ExternalTable/ExternalTableImpl.h | 19 ++- 10 files changed, 293 insertions(+), 137 deletions(-) create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp create mode 100644 src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h diff --git a/src/Client/LibClient.cpp b/src/Client/LibClient.cpp index 315d4eb567e..e3c0cbc2af6 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/LibClient.cpp @@ -1,5 +1,5 @@ +#include #include -#include "Processors/Chunk.h" namespace DB { @@ -11,32 +11,69 @@ extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_PACKET_FROM_SERVER; } -LibClient::LibClient(Connection & connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_) - : connection(connection_) - , timeouts(timeouts_) +namespace +{ + +std::unique_ptr createConnection(const ConnectionParameters & parameters) +{ + return std::make_unique( + parameters.host, + parameters.port, + parameters.default_database, + parameters.user, + parameters.password, + parameters.quota_key, + "", /* cluster */ + "", /* cluster_secret */ + "TimeplusProton", + parameters.compression, + parameters.security); +} + +size_t calculatePollInterval(const ConnectionTimeouts & timeouts) +{ + const auto & receive_timeout = timeouts.receive_timeout; + constexpr size_t default_poll_interval = 1'000'000; /// in microseconds + constexpr size_t min_poll_interval = 5'000; /// in microseconds + return std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); +} + +} + +LibClient::LibClient(ConnectionParameters params_, Poco::Logger * logger_) + : params(params_) + , connection(createConnection(params)) + , poll_interval(calculatePollInterval(params.timeouts)) , logger(logger_) -{} +{ +} -void LibClient::executeQuery(String query, const Callbacks & callbacks) +void LibClient::reset() { - size_t processed_rows {0}; + cancelled = false; + processed_rows = 0; + server_exception = nullptr; +} + +void LibClient::executeQuery(const String & query, const String & query_id) +{ + reset(); + int retries_left = 10; while (retries_left) { try { - connection.sendQuery( - timeouts, + connection->sendQuery( + params.timeouts, query, {}, - "", + query_id, QueryProcessingStage::Complete, nullptr, nullptr, false); - receiveResult(callbacks); - break; } catch (const Exception & e) @@ -51,33 +88,22 @@ void LibClient::executeQuery(String query, const Callbacks & callbacks) } } -/// Receives and processes packets coming from server. -/// Also checks if query execution should be cancelled. -void LibClient::receiveResult(const Callbacks & callbacks) +std::optional LibClient::pollData() { - const auto receive_timeout = timeouts.receive_timeout; - constexpr size_t default_poll_interval = 1000000; /// in microseconds - constexpr size_t min_poll_interval = 5000; /// in microseconds - const size_t poll_interval - = std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); - while (true) { Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE); while (true) { - /// Has the Ctrl+C been pressed and thus the query should be cancelled? - /// If this is the case, inform the server about it and receive the remaining packets - /// to avoid losing sync. if (!cancelled) { double elapsed = receive_watch.elapsedSeconds(); - if (elapsed > receive_timeout.totalSeconds()) + if (elapsed > params.timeouts.receive_timeout.totalSeconds()) { cancelQuery(); - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded while receiving data from server. Waited for {} seconds, timeout is {} seconds", static_cast(elapsed), receive_timeout.totalSeconds()); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded while receiving data from server. Waited for {} seconds, timeout is {} seconds", static_cast(elapsed), params.timeouts.receive_timeout.totalSeconds()); } } @@ -85,32 +111,29 @@ void LibClient::receiveResult(const Callbacks & callbacks) /// Poll for changes after a cancellation check, otherwise it never reached /// because of progress updates from server. - if (connection.poll(poll_interval)) + if (connection->poll(poll_interval)) break; } - if (!receiveAndProcessPacket(cancelled, callbacks)) - break; - } + if (!receiveAndProcessPacket()) + return std::nullopt; - if (cancelled) - LOG_INFO(logger, "Query was cancelled."); + return std::move(next_data); + } } void LibClient::cancelQuery() { - connection.sendCancel(); + LOG_INFO(logger, "Query was cancelled."); + connection->sendCancel(); cancelled = true; } /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. -/// Output of result is suppressed if query was cancelled. -bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks) +bool LibClient::receiveAndProcessPacket() { - Packet packet = connection.receivePacket(); - - Chunk chunk {}; + Packet packet = connection->receivePacket(); switch (packet.type) { @@ -118,58 +141,43 @@ bool LibClient::receiveAndProcessPacket(bool cancelled_, const Callbacks & callb return true; case Protocol::Server::Data: - if (!cancelled_) - callbacks.on_data(packet.block); + next_data = std::move(packet.block); return true; case Protocol::Server::Progress: - if (callbacks.on_progress) - callbacks.on_progress(packet.progress); + // on_progress(packet.progress); return true; case Protocol::Server::ProfileInfo: - if (callbacks.on_profile_info) - callbacks.on_profile_info(packet.profile_info); + // on_profile_info(packet.profile_info); return true; case Protocol::Server::Totals: - if (!cancelled_) - if (callbacks.on_totals) - callbacks.on_totals(packet.block); + // on_totals(packet.block); return true; case Protocol::Server::Extremes: - if (!cancelled_) - if (callbacks.on_extremes) - callbacks.on_extremes(packet.block); + // on_extremes(packet.block); return true; case Protocol::Server::Exception: server_exception.swap(packet.exception); - - if (callbacks.on_receive_exception_from_server) - callbacks.on_receive_exception_from_server(*server_exception); - return false; case Protocol::Server::Log: - if (callbacks.on_log_data) - callbacks.on_log_data(packet.block); + /// on_server_log(packet.block); return true; case Protocol::Server::EndOfStream: - if (callbacks.on_end_of_stream) - callbacks.on_end_of_stream(); return false; case Protocol::Server::ProfileEvents: - if (callbacks.on_profile_events) - callbacks.on_profile_events(packet.block); + /// on_profile_event(packet.block); return true; default: throw Exception( - ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection.getDescription()); + ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); } } @@ -178,4 +186,5 @@ void LibClient::throwServerExceptionIfAny() if (server_exception) server_exception->rethrow(); } + } diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index ac274bcc1eb..124b60558f2 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -1,43 +1,39 @@ #pragma once -#include -#include +#include +#include namespace DB { -struct Callbacks -{ - std::function on_progress; - std::function on_data; - std::function on_log_data; - std::function on_totals; - std::function on_extremes; - std::function on_receive_exception_from_server; - std::function on_profile_info; - std::function on_end_of_stream; - std::function on_profile_events; -}; - /// LibClient is for using as a library client without all the complexities for handling terminal stuff like ClientBase does. +/// This is not thread-safe. class LibClient final { public: - LibClient(Connection & connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_); - void executeQuery(String query, const Callbacks & callbacks = {}); - void receiveResult(const Callbacks & callbacks = {}); + LibClient(ConnectionParameters params_, Poco::Logger * logger_); + + /// Sends the query to the server. + void executeQuery(const String & query, const String & query_id = ""); + /// Cancels the currently running query, does nothing if there is no queries running. void cancelQuery(); + /// Polls data for a query previously sent with `executeQuery`. When no more data are available, the returned optional will be empty. + std::optional pollData(); void throwServerExceptionIfAny(); private: - bool receiveAndProcessPacket(bool cancelled_, const Callbacks & callbacks); + bool receiveAndProcessPacket(); + void reset(); - Connection & connection; - ConnectionTimeouts timeouts; + ConnectionParameters params; + std::unique_ptr connection; + size_t poll_interval; std::atomic_bool cancelled {false}; + size_t processed_rows {0}; + Block next_data; std::unique_ptr server_exception {nullptr}; Poco::Logger * logger; diff --git a/src/Processors/ProcessorID.h b/src/Processors/ProcessorID.h index e044e22670c..68279452cfe 100644 --- a/src/Processors/ProcessorID.h +++ b/src/Processors/ProcessorID.h @@ -247,6 +247,9 @@ enum class ProcessorID : UInt32 GenerateRandomSourceID = 10'045, SourceFromQueryPipelineID = 10'046, ConvertingAggregatedToChunksSourceShuffledID = 10'047, + /// proton: starts + ClickHouseSourceID = 11'000, + /// proton: ends /// Sink Processors EmptySinkID = 20'000, diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 4c4a260bff2..5ccfbbfc841 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -44,6 +45,20 @@ void ClickHouse::startup() LOG_INFO(logger, "startup"); } +Pipe ClickHouse::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t /*max_block_size*/, + size_t /*num_streams*/) +{ + auto client = std::make_unique(connection_params, logger); + auto source = std::make_unique(std::move(client), column_names, processed_stage, context); + return {source}; +} + SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { return std::make_shared(table, metadata_snapshot->getSampleBlock(), connection_params, context, logger); @@ -51,54 +66,59 @@ SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetad ColumnsDescription ClickHouse::getTableStructure() { - auto conn = std::make_unique( - connection_params.host, - connection_params.port, - connection_params.default_database, - connection_params.user, - connection_params.password, - connection_params.quota_key, - "", /*cluster*/ - "", /*cluster_secret*/ - "TimeplusProton", - connection_params.compression, - connection_params.security); - - conn->setCompatibleWithClickHouse(); - - LOG_INFO(logger, "DESCRIBE TABLE {}", table); - conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); + // auto conn = std::make_unique( + // connection_params.host, + // connection_params.port, + // connection_params.default_database, + // connection_params.user, + // connection_params.password, + // connection_params.quota_key, + // "", /*cluster*/ + // "", /*cluster_secret*/ + // "TimeplusProton", + // connection_params.compression, + // connection_params.security); + // + // conn->setCompatibleWithClickHouse(); + + // LOG_INFO(logger, "DESCRIBE TABLE {}", table); + // conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); ColumnsDescription ret {}; - LibClient client {*conn, connection_params.timeouts, logger}; - client.receiveResult({ - .on_data = [&ret](Block & block) + LibClient client {connection_params, logger}; + LOG_INFO(logger, "DESCRIBE TABLE {}", table); + client.executeQuery("DESCRIBE TABLE " + table); + while (true) + { + const auto & block = client.pollData(); + if (!block) + break; + + auto rows = block->rows(); + if (!rows) + continue; + + const auto & cols = block.value().getColumns(); + const auto & factory = DataTypeFactory::instance(); + for (size_t i = 0; i < rows; ++i) { - if (!block.rows()) - return; - - const auto & cols = block.getColumns(); - const auto & factory = DataTypeFactory::instance(); - for (size_t i = 0; i < block.rows(); ++i) + ColumnDescription col_desc {}; + { + const auto & col = block->getByName("name"); + col_desc.name = col.column->getDataAt(i).toString(); + } + { + const auto & col = block->getByName("type"); + col_desc.type = factory.get(col.column->getDataAt(i).toString(), true); + } { - ColumnDescription col_desc {}; - { - const auto & col = block.getByName("name"); - col_desc.name = col.column->getDataAt(i).toString(); - } - { - const auto & col = block.getByName("type"); - col_desc.type = factory.get(col.column->getDataAt(i).toString(), true); - } - { - const auto & col = block.getByName("comment"); - col_desc.comment = col.column->getDataAt(i).toString(); - } - ret.add(col_desc, String(), false, false); + const auto & col = block->getByName("comment"); + col_desc.comment = col.column->getDataAt(i).toString(); } + ret.add(col_desc, String(), false, false); } - }); + } client.throwServerExceptionIfAny(); return ret; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h index d8c8c5a2f36..8b7b0c2f7e5 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -20,6 +20,15 @@ class ClickHouse final : public IExternalTable ColumnsDescription getTableStructure() override; + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t /*num_streams*/) override; + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; private: diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index 59a125dd118..6afa6c2d011 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -11,25 +11,36 @@ namespace DB namespace ExternalTable { +namespace +{ + +String constructInsertQuery(const String & table, const Block & header) +{ + assert(header.columns()); + const auto & col_names = header.getNames(); + + auto query = "INSERT INTO " + backQuoteIfNeed(table) + " (" + backQuoteIfNeed(col_names[0]); + for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) + query.append(", " + backQuoteIfNeed(name)); + query.append(") VALUES "); + + return query; +} + +} + ClickHouseSink::ClickHouseSink( const String & table, const Block & header, const ConnectionParameters & params_, - ContextPtr & context_, + ContextPtr context_, Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) + , insert_into(constructInsertQuery(table, header)) , params(params_) , context(context_) , logger(logger_) { - const auto & col_names = header.getNames(); - assert(!col_names.empty()); - - insert_into = "INSERT INTO " + backQuoteIfNeed(table) + " (" + backQuoteIfNeed(col_names[0]); - for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) - insert_into.append(", " + backQuoteIfNeed(name)); - insert_into.append(") VALUES "); - conn = std::make_unique( params.host, params.port, diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index ce27859de02..ed4abbc9825 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -17,7 +17,7 @@ class ClickHouseSink final : public SinkToStorage const String & table, const Block & header, const ConnectionParameters & params_, - ContextPtr & context_, + ContextPtr context_, Poco::Logger * logger_); String getName() const override { return "ClickHouseSink"; } @@ -33,7 +33,7 @@ class ClickHouseSink final : public SinkToStorage std::unique_ptr buf; OutputFormatPtr output_format; - ContextPtr & context; + ContextPtr context; Poco::Logger * logger; }; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp new file mode 100644 index 00000000000..60b11445894 --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp @@ -0,0 +1,58 @@ +#include +#include + +namespace DB +{ + +namespace ExternalTable +{ + +namespace +{ +String constructSelectQuery(const String & table, const Block & header) +{ + assert(header.columns()); + const auto & col_names = header.getNames(); + + auto query = "SELECT " + backQuoteIfNeed(col_names[0]); + for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) + query.append(", " + backQuoteIfNeed(name)); + query.append(" FROM " + table); + + return query; +} + +} + +ClickHouseSource::ClickHouseSource( + const String & table, + const Block & header, + std::unique_ptr client_, + ContextPtr context_, + Poco::Logger * logger_) + : ISource(header, true, ProcessorID::ClickHouseSourceID) + , client(std::move(client_)) + , query(constructSelectQuery(table, header)) + , context(context_) + , logger(logger_) +{ +} + +Chunk ClickHouseSource::generate() +{ + // if (isCancelled()) + // { + // } + + /// TODO re-design the client API to provide a function to poll data instead of using callbacks. + client->executeQuery(query, { + .on_data = [](Block & blk) + { + } + }); + return {}; +} + +} + +} diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h new file mode 100644 index 00000000000..5c2b077ef48 --- /dev/null +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ExternalTable +{ + +class ClickHouseSource final : public ISource +{ +public: + ClickHouseSource( + const String & table, + const Block & header, + std::unique_ptr client_, + ContextPtr context_, + Poco::Logger * logger_); + + String getName() const override { return "ClickHouseSource"; } + +protected: + Chunk generate() override; + +private: + std::unique_ptr client; + String query; + + ContextPtr context; + Poco::Logger * logger; +}; + +} + +} diff --git a/src/Storages/ExternalTable/ExternalTableImpl.h b/src/Storages/ExternalTable/ExternalTableImpl.h index 6141bc5e37c..4f014a87f95 100644 --- a/src/Storages/ExternalTable/ExternalTableImpl.h +++ b/src/Storages/ExternalTable/ExternalTableImpl.h @@ -1,6 +1,7 @@ #pragma once #include +#include "QueryPipeline/Pipe.h" namespace DB { @@ -16,10 +17,22 @@ class IExternalTable virtual ColumnsDescription getTableStructure() = 0; + virtual Pipe read( + const Names & /*column_names*/, + const StorageSnapshotPtr & /*storage_snapshot*/, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + size_t /*num_streams*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading data from this type of external table is not supported"); + } + virtual SinkToStoragePtr write(const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr /* context */) -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Ingesting data to this type of external stream is not supported"); -} + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Ingesting data to this type of external table is not supported"); + } }; using IExternalTablePtr = std::unique_ptr; From 2385851275dc5ac594638669d6d4362270f65ba3 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Mon, 29 Jan 2024 20:42:01 -0800 Subject: [PATCH 16/26] refactor LibClient --- src/Client/LibClient.cpp | 68 ++++++++++++++++++- src/Client/LibClient.h | 7 +- .../ClickHouse/ClickHouseSink.cpp | 26 ++----- .../ExternalTable/ClickHouse/ClickHouseSink.h | 5 +- .../ClickHouse/ClickHouseSource.cpp | 31 ++++++--- .../ClickHouse/ClickHouseSource.h | 2 + 6 files changed, 101 insertions(+), 38 deletions(-) diff --git a/src/Client/LibClient.cpp b/src/Client/LibClient.cpp index e3c0cbc2af6..764667f17b5 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/LibClient.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -9,6 +10,7 @@ namespace ErrorCodes extern const int DEADLOCK_AVOIDED; extern const int TIMEOUT_EXCEEDED; extern const int UNKNOWN_PACKET_FROM_SERVER; +extern const int UNEXPECTED_PACKET_FROM_SERVER; } namespace @@ -16,7 +18,7 @@ namespace std::unique_ptr createConnection(const ConnectionParameters & parameters) { - return std::make_unique( + auto ret = std::make_unique( parameters.host, parameters.port, parameters.default_database, @@ -28,6 +30,9 @@ std::unique_ptr createConnection(const ConnectionParameters & parame "TimeplusProton", parameters.compression, parameters.security); + + ret->setCompatibleWithClickHouse(); + return ret; } size_t calculatePollInterval(const ConnectionTimeouts & timeouts) @@ -57,6 +62,9 @@ void LibClient::reset() void LibClient::executeQuery(const String & query, const String & query_id) { + assert(!has_running_query); + has_running_query = true; + reset(); int retries_left = 10; @@ -83,13 +91,25 @@ void LibClient::executeQuery(const String & query, const String & query_id) if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left) LOG_ERROR(logger, "Got a transient error from the server, will retry ({} retries left)", retries_left); else + { + has_running_query = false; throw; + } } } } +void LibClient::executeInsertQuery(const String & query, const String & query_id) +{ + executeQuery(query, query_id); + receiveEndOfQuery(); +} + std::optional LibClient::pollData() { + if (!has_running_query) + return std::nullopt; + while (true) { Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE); @@ -116,7 +136,10 @@ std::optional LibClient::pollData() } if (!receiveAndProcessPacket()) + { + has_running_query = false; return std::nullopt; + } return std::move(next_data); } @@ -124,15 +147,21 @@ std::optional LibClient::pollData() void LibClient::cancelQuery() { + if (!has_running_query) + return; + LOG_INFO(logger, "Query was cancelled."); connection->sendCancel(); cancelled = true; + has_running_query = false; } /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. bool LibClient::receiveAndProcessPacket() { + assert(has_running_query); + Packet packet = connection->receivePacket(); switch (packet.type) @@ -181,6 +210,43 @@ bool LibClient::receiveAndProcessPacket() } } +/// Process Log packets, exit when receive Exception or EndOfStream +bool LibClient::receiveEndOfQuery() +{ + while (true) + { + Packet packet = connection->receivePacket(); + + switch (packet.type) + { + case Protocol::Server::EndOfStream: + /// onEndOfStream(); + return true; + + case Protocol::Server::Exception: + server_exception.swap(packet.exception); + return false; + + case Protocol::Server::Log: + /// onLogData(packet.block); + break; + + case Protocol::Server::Progress: + /// onProgress(packet.progress); + break; + + case Protocol::Server::ProfileEvents: + /// onProfileEvents(packet.block); + break; + + default: + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER, + "Unexpected packet from server (expected Exception, EndOfStream, Log, Progress or ProfileEvents. Got {})", + String(Protocol::Server::toString(packet.type))); + } + } +} + void LibClient::throwServerExceptionIfAny() { if (server_exception) diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index 124b60558f2..43c69bb8dbf 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -14,8 +14,10 @@ class LibClient final LibClient(ConnectionParameters params_, Poco::Logger * logger_); - /// Sends the query to the server. + /// Sends the query to the server to execute. For insert queries, use `executeInsertQuery` instead. void executeQuery(const String & query, const String & query_id = ""); + // Sends an insert query to the server to execute. + void executeInsertQuery(const String & query, const String & query_id = ""); /// Cancels the currently running query, does nothing if there is no queries running. void cancelQuery(); /// Polls data for a query previously sent with `executeQuery`. When no more data are available, the returned optional will be empty. @@ -25,12 +27,15 @@ class LibClient final private: bool receiveAndProcessPacket(); + bool receiveEndOfQuery(); + void reset(); ConnectionParameters params; std::unique_ptr connection; size_t poll_interval; + bool has_running_query {false}; std::atomic_bool cancelled {false}; size_t processed_rows {0}; Block next_data; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index 6afa6c2d011..5286756230c 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -37,25 +37,10 @@ ClickHouseSink::ClickHouseSink( Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) , insert_into(constructInsertQuery(table, header)) - , params(params_) + , client(std::make_unique(params_, logger_)) , context(context_) , logger(logger_) { - conn = std::make_unique( - params.host, - params.port, - params.default_database, - params.user, - params.password, - params.quota_key, - "", /*cluster*/ - "", /*cluster_secret*/ - "TimeplusProton", - params.compression, - params.security); - - conn->setCompatibleWithClickHouse(); - buf = std::make_unique(); auto format_settings = getFormatSettings(context); format_settings.values.no_commas_between_rows = true; @@ -91,12 +76,9 @@ void ClickHouseSink::consume(Chunk chunk) output_format->write(block); String query_to_sent {buf->str()}; - conn->forceConnected(params.timeouts); /// The connection chould have been idle for too long - conn->sendQuery(params.timeouts, query_to_sent, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); - - LibClient client {*conn, params.timeouts, logger}; - client.receiveResult(); - client.throwServerExceptionIfAny(); + // conn->forceConnected(params.timeouts); /// The connection chould have been idle for too long + client->executeInsertQuery(query_to_sent); + client->throwServerExceptionIfAny(); } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index ed4abbc9825..72ac3d30e19 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -27,8 +27,7 @@ class ClickHouseSink final : public SinkToStorage private: String insert_into; - const ConnectionParameters & params; - std::unique_ptr conn; + std::unique_ptr client; std::unique_ptr buf; OutputFormatPtr output_format; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp index 60b11445894..eaa1b4fd29d 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp @@ -40,17 +40,26 @@ ClickHouseSource::ClickHouseSource( Chunk ClickHouseSource::generate() { - // if (isCancelled()) - // { - // } - - /// TODO re-design the client API to provide a function to poll data instead of using callbacks. - client->executeQuery(query, { - .on_data = [](Block & blk) - { - } - }); - return {}; + if (isCancelled()) + { + if (started) + client->cancelQuery(); + + return {}; + } + + if (!started) + { + started = true; + client->executeQuery(query); + } + + auto block = client->pollData(); + client->throwServerExceptionIfAny(); + if (!block) + return {}; + + return {block->getColumns(), block->rows()}; } } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h index 5c2b077ef48..c5feafc71fa 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h @@ -25,6 +25,8 @@ class ClickHouseSource final : public ISource Chunk generate() override; private: + bool started {false}; + std::unique_ptr client; String query; From d636d6b2e37477c1d921676cf85fb76454b4f0b1 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Mon, 29 Jan 2024 22:54:32 -0800 Subject: [PATCH 17/26] fixed compile errors --- .../ExternalTable/ClickHouse/ClickHouse.cpp | 15 ++++++++++++--- .../ExternalTable/ClickHouse/ClickHouseSource.cpp | 6 ++++++ .../ExternalTable/ClickHouse/ClickHouseSource.h | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 5ccfbbfc841..d445aadc53c 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -48,15 +48,24 @@ void ClickHouse::startup() Pipe ClickHouse::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, + SelectQueryInfo & /*query_info*/, ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, size_t /*num_streams*/) { + /// For queries like `SELECT count(*) FROM tumble(table, now(), 5s) GROUP BY window_end` don't have required column from table. + /// We will need add one + Block header; + if (!column_names.empty()) + /// FIXME select 1 + header = storage_snapshot->getSampleBlockForColumns({""}); + else + header = storage_snapshot->getSampleBlockForColumns(column_names); + auto client = std::make_unique(connection_params, logger); - auto source = std::make_unique(std::move(client), column_names, processed_stage, context); - return {source}; + auto source = std::make_shared(table, header, std::move(client), processed_stage, context, logger); + return Pipe(std::move(source)); } SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp index eaa1b4fd29d..389d7589255 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp @@ -28,6 +28,7 @@ ClickHouseSource::ClickHouseSource( const String & table, const Block & header, std::unique_ptr client_, + QueryProcessingStage::Enum /*processed_stage*/, ContextPtr context_, Poco::Logger * logger_) : ISource(header, true, ProcessorID::ClickHouseSourceID) @@ -54,11 +55,16 @@ Chunk ClickHouseSource::generate() client->executeQuery(query); } + LOG_INFO(logger, "polling data"); auto block = client->pollData(); client->throwServerExceptionIfAny(); if (!block) + { + LOG_INFO(logger, "no more data"); return {}; + } + LOG_INFO(logger, "received {} rows", block->rows()); return {block->getColumns(), block->rows()}; } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h index c5feafc71fa..d5a3719a819 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h @@ -16,6 +16,7 @@ class ClickHouseSource final : public ISource const String & table, const Block & header, std::unique_ptr client_, + QueryProcessingStage::Enum processed_stage, ContextPtr context_, Poco::Logger * logger_); From 45360be7030b29eb0ee8f268d657ba33498ffe9f Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Tue, 30 Jan 2024 00:29:51 -0800 Subject: [PATCH 18/26] fixed source --- .../ExternalTable/ClickHouse/ClickHouse.cpp | 2 +- .../ExternalTable/StorageExternalTable.cpp | 12 ++++++++++++ .../ExternalTable/StorageExternalTable.h | 19 +++++++++---------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index d445aadc53c..0c8db7be573 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -57,7 +57,7 @@ Pipe ClickHouse::read( /// For queries like `SELECT count(*) FROM tumble(table, now(), 5s) GROUP BY window_end` don't have required column from table. /// We will need add one Block header; - if (!column_names.empty()) + if (column_names.empty()) /// FIXME select 1 header = storage_snapshot->getSampleBlockForColumns({""}); else diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index a3785e21fb6..450cb03ca49 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -24,6 +24,18 @@ StorageExternalTable::StorageExternalTable( setStorageMetadata(args); } +Pipe StorageExternalTable::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context_, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + return external_table->read(column_names, storage_snapshot, query_info, context_, processed_stage, max_block_size, num_streams); +} + SinkToStoragePtr StorageExternalTable::write( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index 4e5f4ea24c4..7ece55a72bf 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -25,6 +25,15 @@ class StorageExternalTable final : public shared_ptr_helperstartup(); } void shutdown() override { external_table->shutdown(); } + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context_, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + SinkToStoragePtr write( const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, @@ -37,16 +46,6 @@ class StorageExternalTable final : public shared_ptr_helper Date: Tue, 30 Jan 2024 03:21:43 -0800 Subject: [PATCH 19/26] bug fixes and cleanup --- src/Client/LibClient.cpp | 42 +++++++++++++------ src/Client/LibClient.h | 3 ++ .../ExternalTable/ClickHouse/ClickHouse.cpp | 35 +++------------- 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/Client/LibClient.cpp b/src/Client/LibClient.cpp index 764667f17b5..8f6d91b8e9a 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/LibClient.cpp @@ -67,8 +67,7 @@ void LibClient::executeQuery(const String & query, const String & query_id) reset(); - int retries_left = 10; - while (retries_left) + while (true) { try { @@ -86,10 +85,17 @@ void LibClient::executeQuery(const String & query, const String & query_id) } catch (const Exception & e) { - /// Retry when the server said "Client should retry" and no rows - /// has been received yet. - if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left) - LOG_ERROR(logger, "Got a transient error from the server, will retry ({} retries left)", retries_left); + /// connection lost + if (!connection->checkConnected()) + { + LOG_ERROR(logger, "ClickHouse connection lost"); + /// set the connection not connected so that sendQuery will reconnect + connection->disconnect(); + } + + /// Retry when the server said "Client should retry" and no rows has been received yet. + if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED) + LOG_ERROR(logger, "Got a transient error from the server, will retry"); else { has_running_query = false; @@ -189,17 +195,18 @@ bool LibClient::receiveAndProcessPacket() // on_extremes(packet.block); return true; + case Protocol::Server::EndOfStream: + onEndOfStream(); + return true; + case Protocol::Server::Exception: - server_exception.swap(packet.exception); + onServerException(std::move(packet.exception)); return false; case Protocol::Server::Log: /// on_server_log(packet.block); return true; - case Protocol::Server::EndOfStream: - return false; - case Protocol::Server::ProfileEvents: /// on_profile_event(packet.block); return true; @@ -220,11 +227,11 @@ bool LibClient::receiveEndOfQuery() switch (packet.type) { case Protocol::Server::EndOfStream: - /// onEndOfStream(); + onEndOfStream(); return true; case Protocol::Server::Exception: - server_exception.swap(packet.exception); + onServerException(std::move(packet.exception)); return false; case Protocol::Server::Log: @@ -247,6 +254,17 @@ bool LibClient::receiveEndOfQuery() } } +void LibClient::onEndOfStream() +{ + has_running_query = false; +} + +void LibClient::onServerException(std::unique_ptr && exception) +{ + server_exception.swap(exception); + has_running_query = false; +} + void LibClient::throwServerExceptionIfAny() { if (server_exception) diff --git a/src/Client/LibClient.h b/src/Client/LibClient.h index 43c69bb8dbf..0b399ba99dc 100644 --- a/src/Client/LibClient.h +++ b/src/Client/LibClient.h @@ -31,6 +31,9 @@ class LibClient final void reset(); + void onEndOfStream(); + void onServerException(std::unique_ptr && exception); + ConnectionParameters params; std::unique_ptr connection; size_t poll_interval; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 0c8db7be573..d5b0e2100c3 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -32,11 +32,12 @@ ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings, C connection_params.password = settings->password.value; connection_params.default_database = settings->database.value; connection_params.security = settings->secure.value ? Protocol::Secure::Enable : Protocol::Secure::Disable; + /// TODO read from settings connection_params.timeouts = { /*connection_timeout_=*/ 1 * 60 * 1'000'000, /*send_timeout_=*/ 1 * 60 * 1'000'000, /*receive_timeout_=*/ 1 * 60 * 1'000'000, - /*tcp_keep_alive_timeout_=*/ 10 * 60 * 1'000'000 + /*tcp_keep_alive_timeout_=*/ 5 * 60 * 1'000'000 }; } @@ -54,17 +55,9 @@ Pipe ClickHouse::read( size_t /*max_block_size*/, size_t /*num_streams*/) { - /// For queries like `SELECT count(*) FROM tumble(table, now(), 5s) GROUP BY window_end` don't have required column from table. - /// We will need add one - Block header; - if (column_names.empty()) - /// FIXME select 1 - header = storage_snapshot->getSampleBlockForColumns({""}); - else - header = storage_snapshot->getSampleBlockForColumns(column_names); - + auto header = storage_snapshot->getSampleBlockForColumns(column_names); auto client = std::make_unique(connection_params, logger); - auto source = std::make_shared(table, header, std::move(client), processed_stage, context, logger); + auto source = std::make_shared(table, std::move(header), std::move(client), processed_stage, context, logger); return Pipe(std::move(source)); } @@ -75,29 +68,11 @@ SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetad ColumnsDescription ClickHouse::getTableStructure() { - // auto conn = std::make_unique( - // connection_params.host, - // connection_params.port, - // connection_params.default_database, - // connection_params.user, - // connection_params.password, - // connection_params.quota_key, - // "", /*cluster*/ - // "", /*cluster_secret*/ - // "TimeplusProton", - // connection_params.compression, - // connection_params.security); - // - // conn->setCompatibleWithClickHouse(); - - // LOG_INFO(logger, "DESCRIBE TABLE {}", table); - // conn->sendQuery(connection_params.timeouts, "DESCRIBE TABLE " + table, {}, "", QueryProcessingStage::Complete, nullptr, nullptr, false); - ColumnsDescription ret {}; LibClient client {connection_params, logger}; - LOG_INFO(logger, "DESCRIBE TABLE {}", table); client.executeQuery("DESCRIBE TABLE " + table); + LOG_INFO(logger, "Receiving table schema"); while (true) { const auto & block = client.pollData(); From 9070efe3f05d56199846b3baa819b850eecc2371 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Tue, 30 Jan 2024 21:31:37 -0800 Subject: [PATCH 20/26] renamed LibClient to ClickHouseClient --- .../{LibClient.cpp => ClickHouseClient.cpp} | 24 +++++++++---------- .../{LibClient.h => ClickHouseClient.h} | 6 ++--- .../ExternalTable/ClickHouse/ClickHouse.cpp | 6 ++--- .../ClickHouse/ClickHouseSink.cpp | 4 ++-- .../ExternalTable/ClickHouse/ClickHouseSink.h | 4 ++-- .../ClickHouse/ClickHouseSource.cpp | 2 +- .../ClickHouse/ClickHouseSource.h | 6 ++--- 7 files changed, 26 insertions(+), 26 deletions(-) rename src/Client/{LibClient.cpp => ClickHouseClient.cpp} (90%) rename src/Client/{LibClient.h => ClickHouseClient.h} (84%) diff --git a/src/Client/LibClient.cpp b/src/Client/ClickHouseClient.cpp similarity index 90% rename from src/Client/LibClient.cpp rename to src/Client/ClickHouseClient.cpp index 8f6d91b8e9a..33ed4481d67 100644 --- a/src/Client/LibClient.cpp +++ b/src/Client/ClickHouseClient.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { @@ -45,7 +45,7 @@ size_t calculatePollInterval(const ConnectionTimeouts & timeouts) } -LibClient::LibClient(ConnectionParameters params_, Poco::Logger * logger_) +ClickHouseClient::ClickHouseClient(ConnectionParameters params_, Poco::Logger * logger_) : params(params_) , connection(createConnection(params)) , poll_interval(calculatePollInterval(params.timeouts)) @@ -53,14 +53,14 @@ LibClient::LibClient(ConnectionParameters params_, Poco::Logger * logger_) { } -void LibClient::reset() +void ClickHouseClient::reset() { cancelled = false; processed_rows = 0; server_exception = nullptr; } -void LibClient::executeQuery(const String & query, const String & query_id) +void ClickHouseClient::executeQuery(const String & query, const String & query_id) { assert(!has_running_query); has_running_query = true; @@ -105,13 +105,13 @@ void LibClient::executeQuery(const String & query, const String & query_id) } } -void LibClient::executeInsertQuery(const String & query, const String & query_id) +void ClickHouseClient::executeInsertQuery(const String & query, const String & query_id) { executeQuery(query, query_id); receiveEndOfQuery(); } -std::optional LibClient::pollData() +std::optional ClickHouseClient::pollData() { if (!has_running_query) return std::nullopt; @@ -151,7 +151,7 @@ std::optional LibClient::pollData() } } -void LibClient::cancelQuery() +void ClickHouseClient::cancelQuery() { if (!has_running_query) return; @@ -164,7 +164,7 @@ void LibClient::cancelQuery() /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. -bool LibClient::receiveAndProcessPacket() +bool ClickHouseClient::receiveAndProcessPacket() { assert(has_running_query); @@ -218,7 +218,7 @@ bool LibClient::receiveAndProcessPacket() } /// Process Log packets, exit when receive Exception or EndOfStream -bool LibClient::receiveEndOfQuery() +bool ClickHouseClient::receiveEndOfQuery() { while (true) { @@ -254,18 +254,18 @@ bool LibClient::receiveEndOfQuery() } } -void LibClient::onEndOfStream() +void ClickHouseClient::onEndOfStream() { has_running_query = false; } -void LibClient::onServerException(std::unique_ptr && exception) +void ClickHouseClient::onServerException(std::unique_ptr && exception) { server_exception.swap(exception); has_running_query = false; } -void LibClient::throwServerExceptionIfAny() +void ClickHouseClient::throwServerExceptionIfAny() { if (server_exception) server_exception->rethrow(); diff --git a/src/Client/LibClient.h b/src/Client/ClickHouseClient.h similarity index 84% rename from src/Client/LibClient.h rename to src/Client/ClickHouseClient.h index 0b399ba99dc..4ca02818363 100644 --- a/src/Client/LibClient.h +++ b/src/Client/ClickHouseClient.h @@ -6,13 +6,13 @@ namespace DB { -/// LibClient is for using as a library client without all the complexities for handling terminal stuff like ClientBase does. +/// ClickHouseClient is a client that is compatiable with the ClickHouse protocol and can be used to talk to ClickHouse servers. /// This is not thread-safe. -class LibClient final +class ClickHouseClient final { public: - LibClient(ConnectionParameters params_, Poco::Logger * logger_); + ClickHouseClient(ConnectionParameters params_, Poco::Logger * logger_); /// Sends the query to the server to execute. For insert queries, use `executeInsertQuery` instead. void executeQuery(const String & query, const String & query_id = ""); diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index d5b0e2100c3..554b464197b 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -56,7 +56,7 @@ Pipe ClickHouse::read( size_t /*num_streams*/) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - auto client = std::make_unique(connection_params, logger); + auto client = std::make_unique(connection_params, logger); auto source = std::make_shared(table, std::move(header), std::move(client), processed_stage, context, logger); return Pipe(std::move(source)); } @@ -70,7 +70,7 @@ ColumnsDescription ClickHouse::getTableStructure() { ColumnsDescription ret {}; - LibClient client {connection_params, logger}; + ClickHouseClient client {connection_params, logger}; client.executeQuery("DESCRIBE TABLE " + table); LOG_INFO(logger, "Receiving table schema"); while (true) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index 5286756230c..c756d717694 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -1,5 +1,5 @@ +#include #include -#include #include #include #include @@ -37,7 +37,7 @@ ClickHouseSink::ClickHouseSink( Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) , insert_into(constructInsertQuery(table, header)) - , client(std::make_unique(params_, logger_)) + , client(std::make_unique(params_, logger_)) , context(context_) , logger(logger_) { diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index 72ac3d30e19..9ec7bdb15bc 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -27,7 +27,7 @@ class ClickHouseSink final : public SinkToStorage private: String insert_into; - std::unique_ptr client; + std::unique_ptr client; std::unique_ptr buf; OutputFormatPtr output_format; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp index 389d7589255..9718d09d59c 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp @@ -27,7 +27,7 @@ String constructSelectQuery(const String & table, const Block & header) ClickHouseSource::ClickHouseSource( const String & table, const Block & header, - std::unique_ptr client_, + std::unique_ptr client_, QueryProcessingStage::Enum /*processed_stage*/, ContextPtr context_, Poco::Logger * logger_) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h index d5a3719a819..0f0f7d898f3 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB @@ -15,7 +15,7 @@ class ClickHouseSource final : public ISource ClickHouseSource( const String & table, const Block & header, - std::unique_ptr client_, + std::unique_ptr client_, QueryProcessingStage::Enum processed_stage, ContextPtr context_, Poco::Logger * logger_); @@ -28,7 +28,7 @@ class ClickHouseSource final : public ISource private: bool started {false}; - std::unique_ptr client; + std::unique_ptr client; String query; ContextPtr context; From 3df5aba883db2490202f1e8fa6c4ae923138ffda Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Wed, 31 Jan 2024 02:09:49 -0800 Subject: [PATCH 21/26] removed unused code --- src/Client/Connection.cpp | 6 ----- src/Common/IFactoryWithAliases.h | 11 --------- src/DataTypes/DataTypeFactory.cpp | 40 ------------------------------- src/DataTypes/DataTypeFactory.h | 1 - src/Formats/NativeWriter.cpp | 6 ----- src/Formats/NativeWriter.h | 8 ------- 6 files changed, 72 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 6576b2c13b7..d261cf45017 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -589,10 +589,6 @@ void Connection::sendData(const Block & block, const String & name, bool scalar) maybe_compressed_out = out; block_out = std::make_unique(*maybe_compressed_out, block.cloneEmpty(), server_revision); - /// proton: starts - if (compatible_with_clickhouse) - block_out->setCompatibleWithClickHouse(); - /// proton: ends } if (scalar) @@ -1110,8 +1106,6 @@ void Connection::setCompatibleWithClickHouse() block_logs_in->setCompatibleWithClickHouse(); if (block_profile_events_in) block_profile_events_in->setCompatibleWithClickHouse(); - if (block_out) - block_out->setCompatibleWithClickHouse(); } /// proton: ends diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 791c4fd27e9..cdb9100a6f5 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -43,14 +43,6 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> else return name; } - - String getClickHouseAliasFromOrName(const String & name) const - { - if (reversed_clickhouse_names.contains(name)) - return reversed_clickhouse_names.at(name); - else - return name; - } /// proton: ends std::unordered_map case_insensitive_name_mapping; @@ -122,7 +114,6 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> if (!clickhouse_names.emplace(alias_name, real_dict_name).second) throw Exception(factory_name + ": ClickHouse alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); - reversed_clickhouse_names.emplace(real_dict_name, alias_name); } /// proton: ends @@ -193,8 +184,6 @@ class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> /// proton: starts /// ClickHouse names map to data_types from previous two maps AliasMap clickhouse_names; - /// For looking up Proton type names from ClickHouse names - AliasMap reversed_clickhouse_names; /// proton: ends }; diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index ace01ffef18..6fd3161350b 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -32,46 +32,6 @@ DataTypePtr DataTypeFactory::get(TypeIndex type) const { return get(typeIndexToTypeName(type)); } - -String DataTypeFactory::getClickHouseNameFromName(const String & name) const -{ - /// Data type parser can be invoked from coroutines with small stack. - /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) - /// let's make the threshold significantly lower. - /// It is impractical for user to have complex data types with this depth. - -#if defined(SANITIZER) || !defined(NDEBUG) - static constexpr size_t data_type_max_parse_depth = 150; -#else - static constexpr size_t data_type_max_parse_depth = 300; -#endif - - ParserDataType parser; - ASTPtr ast = parseQuery(parser, name.data(), name.data() + name.size(), "data type", 0, data_type_max_parse_depth); - if (const auto * func = ast->as()) - { - if (func->parameters) - throw Exception("Data type cannot have multiple parenthesized parameters.", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); - - if (func->arguments) - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type with arguments is not supported yet, got {}.", func->name); - - return getClickHouseAliasFromOrName(func->name); - } - - if (const auto * ident = ast->as()) - { - return getClickHouseAliasFromOrName(ident->name()); - } - - if (const auto * lit = ast->as()) - { - if (lit->value.isNull()) - return "Null"; - } - - throw Exception("Unexpected AST element for data type.", ErrorCodes::UNEXPECTED_AST_STRUCTURE); -} /// proton: ends. DataTypePtr DataTypeFactory::get(const String & full_name/* proton: starts*/, bool compatible_with_clickhouse/* proton: ends*/) const diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 4d349a4731c..126edc10ec3 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -33,7 +33,6 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli /// proton: starts. DataTypePtr get(TypeIndex type) const; - String getClickHouseNameFromName(const String & name) const; /// proton: ends. DataTypePtr get(const String & full_name/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 381e8a19778..0afa1a4b625 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -91,7 +91,6 @@ void NativeWriter::write(const Block & block) index_block.columns.resize(columns); } - const auto & factory = DataTypeFactory::instance(); for (size_t i = 0; i < columns; ++i) { /// For the index. @@ -112,11 +111,6 @@ void NativeWriter::write(const Block & block) /// Type String type_name = column.type->getName(); - /// proton: starts - if (compatible_with_clickhouse) - type_name = factory.getClickHouseNameFromName(type_name); - /// proton: ends - writeStringBinary(type_name, ostr); setVersionToAggregateFunctions(column.type, true, client_revision); diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index d28104ccb8b..6631d2c42b5 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -32,10 +32,6 @@ class NativeWriter static String getContentType() { return "application/octet-stream"; } - /// proton: starts - void setCompatibleWithClickHouse() { compatible_with_clickhouse = true; } - /// proton: end - private: WriteBuffer & ostr; Block header; @@ -44,10 +40,6 @@ class NativeWriter size_t initial_size_of_file; /// The initial size of the data file, if `append` done. Used for the index. /// If you need to write index, then `ostr` must be a CompressedWriteBuffer. CompressedWriteBuffer * ostr_concrete = nullptr; - - /// proton: starts - bool compatible_with_clickhouse {false}; - /// proton: ends }; } From 10c98b54a0229fbe0589dac3a1d2e7d045311f8e Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Wed, 31 Jan 2024 18:44:09 -0800 Subject: [PATCH 22/26] cleanup --- src/Client/ClickHouseClient.cpp | 24 ++++++++++--------- src/Client/ClickHouseClient.h | 18 +++++++++----- src/Client/Connection.cpp | 3 ++- src/DataTypes/DataTypeAggregateFunction.cpp | 4 +++- src/DataTypes/DataTypeArray.cpp | 4 +++- src/DataTypes/DataTypeCustomGeo.cpp | 7 ++++++ .../DataTypeCustomSimpleAggregateFunction.cpp | 6 +++-- src/DataTypes/DataTypeDate.cpp | 2 ++ src/DataTypes/DataTypeDate32.cpp | 2 ++ src/DataTypes/DataTypeDomainBool.cpp | 2 ++ src/DataTypes/DataTypeEnum.cpp | 8 ++++--- src/DataTypes/DataTypeFactory.h | 1 - src/DataTypes/DataTypeFixedString.cpp | 4 +++- src/DataTypes/DataTypeIPv4andIPv6.cpp | 2 ++ src/DataTypes/DataTypeInterval.cpp | 2 ++ src/DataTypes/DataTypeLowCardinality.cpp | 4 +++- src/DataTypes/DataTypeMap.cpp | 4 +++- src/DataTypes/DataTypeNested.cpp | 4 +++- src/DataTypes/DataTypeNothing.cpp | 2 ++ src/DataTypes/DataTypeNullable.cpp | 4 +++- src/DataTypes/DataTypeObject.cpp | 2 ++ src/DataTypes/DataTypeString.cpp | 5 ++-- src/DataTypes/DataTypeTuple.cpp | 6 +++-- src/DataTypes/DataTypeUUID.cpp | 2 ++ src/DataTypes/DataTypesDecimal.cpp | 6 +++-- src/DataTypes/DataTypesNumber.cpp | 4 +++- src/DataTypes/IDataTypeTranslator.h | 14 ----------- src/DataTypes/registerDataTypeDateTime.cpp | 8 ++++--- src/Formats/NativeWriter.cpp | 1 - .../ExternalTable/ClickHouse/ClickHouse.cpp | 11 ++++++++- .../ExternalTable/ClickHouse/ClickHouse.h | 4 ++-- .../ExternalTable/ExternalTableFactory.cpp | 14 ++++++++--- .../ExternalTable/ExternalTableFactory.h | 10 ++++---- .../{ExternalTableImpl.h => IExternalTable.h} | 0 .../ExternalTable/StorageExternalTable.cpp | 10 ++------ .../ExternalTable/StorageExternalTable.h | 2 +- 36 files changed, 131 insertions(+), 75 deletions(-) delete mode 100644 src/DataTypes/IDataTypeTranslator.h rename src/Storages/ExternalTable/{ExternalTableImpl.h => IExternalTable.h} (100%) diff --git a/src/Client/ClickHouseClient.cpp b/src/Client/ClickHouseClient.cpp index 33ed4481d67..a73840d1399 100644 --- a/src/Client/ClickHouseClient.cpp +++ b/src/Client/ClickHouseClient.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB @@ -74,11 +73,11 @@ void ClickHouseClient::executeQuery(const String & query, const String & query_i connection->sendQuery( params.timeouts, query, - {}, + {} /*query_parameters*/, query_id, QueryProcessingStage::Complete, - nullptr, - nullptr, + nullptr /*settings*/, + nullptr /*client_info*/, false); break; @@ -88,14 +87,16 @@ void ClickHouseClient::executeQuery(const String & query, const String & query_i /// connection lost if (!connection->checkConnected()) { - LOG_ERROR(logger, "ClickHouse connection lost"); + LOG_ERROR(logger, "Connection lost"); /// set the connection not connected so that sendQuery will reconnect connection->disconnect(); } - /// Retry when the server said "Client should retry" and no rows has been received yet. - if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED) - LOG_ERROR(logger, "Got a transient error from the server, will retry"); + else if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED) + { + LOG_ERROR(logger, "Got a transient error from the server, will retry in 1 second"); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } else { has_running_query = false; @@ -147,7 +148,7 @@ std::optional ClickHouseClient::pollData() return std::nullopt; } - return std::move(next_data); + return std::move(polled_data); } } @@ -156,7 +157,7 @@ void ClickHouseClient::cancelQuery() if (!has_running_query) return; - LOG_INFO(logger, "Query was cancelled."); + LOG_INFO(logger, "Query cancelled."); connection->sendCancel(); cancelled = true; has_running_query = false; @@ -176,7 +177,8 @@ bool ClickHouseClient::receiveAndProcessPacket() return true; case Protocol::Server::Data: - next_data = std::move(packet.block); + processed_rows += packet.block.rows(); + polled_data = std::move(packet.block); return true; case Protocol::Server::Progress: diff --git a/src/Client/ClickHouseClient.h b/src/Client/ClickHouseClient.h index 4ca02818363..1db3260217c 100644 --- a/src/Client/ClickHouseClient.h +++ b/src/Client/ClickHouseClient.h @@ -7,7 +7,9 @@ namespace DB { /// ClickHouseClient is a client that is compatiable with the ClickHouse protocol and can be used to talk to ClickHouse servers. -/// This is not thread-safe. +/// Note: +/// * This client is designed for the ClickHouse ExternalTable, so it's not 100% compatiable with ClickHouse protocol, it just needs to make sure the ExternalTable is functional. +/// * A client object should not be shared with multiple threads. class ClickHouseClient final { public: @@ -15,14 +17,18 @@ class ClickHouseClient final ClickHouseClient(ConnectionParameters params_, Poco::Logger * logger_); /// Sends the query to the server to execute. For insert queries, use `executeInsertQuery` instead. + /// Make sure keep calling the `pollData` method until it returns an empty optional, until which the + /// client won't be able to execute another query. void executeQuery(const String & query, const String & query_id = ""); - // Sends an insert query to the server to execute. + /// Sends an insert query to the server to execute. The difference between this and executeQuery is that, + /// after calling this method, there is no need to call the `pollData` method. void executeInsertQuery(const String & query, const String & query_id = ""); /// Cancels the currently running query, does nothing if there is no queries running. void cancelQuery(); - /// Polls data for a query previously sent with `executeQuery`. When no more data are available, the returned optional will be empty. + /// Polls data for a query previously sent with `executeQuery`. When no more data are available, + /// the returned optional will be empty. std::optional pollData(); - + /// Throw the server exception received from the ClickHouse server if any (during `pollData` or `executeInsertQuery`). void throwServerExceptionIfAny(); private: @@ -39,9 +45,9 @@ class ClickHouseClient final size_t poll_interval; bool has_running_query {false}; - std::atomic_bool cancelled {false}; + bool cancelled {false}; size_t processed_rows {0}; - Block next_data; + Block polled_data; std::unique_ptr server_exception {nullptr}; Poco::Logger * logger; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index d261cf45017..b2948c29509 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1000,7 +1000,8 @@ void Connection::initBlockLogsInput() /// Have to return superset of SystemLogsQueue::getSampleBlock() columns block_logs_in = std::make_unique(*in, server_revision); /// proton: starts - if (compatible_with_clickhouse) block_logs_in->setCompatibleWithClickHouse(); + if (compatible_with_clickhouse) + block_logs_in->setCompatibleWithClickHouse(); /// proton: ends } } diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 87119cc4536..25a6a04c0a6 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -228,7 +228,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com ErrorCodes::BAD_ARGUMENTS); for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i) - argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i], compatible_with_clickhouse)); + argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); @@ -260,7 +260,9 @@ void registerDataTypeAggregateFunction(DataTypeFactory & factory) { factory.registerDataType("aggregate_function", create); + /// proton: starts factory.registerClickHouseAlias("AggregateFunction", "aggregate_function"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 5a95dff04ff..9ed7c1fd6ed 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com if (!arguments || arguments->children.size() != 1) throw Exception("array data type family must have exactly one argument - type of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return std::make_shared(DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse)); + return std::make_shared(DataTypeFactory::instance().get(arguments->children[0]/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); } @@ -72,7 +72,9 @@ void registerDataTypeArray(DataTypeFactory & factory) { factory.registerDataType("array", create); + /// proton: starts factory.registerClickHouseAlias("Array", "array"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeCustomGeo.cpp b/src/DataTypes/DataTypeCustomGeo.cpp index 23c4086267e..98059d7c76f 100644 --- a/src/DataTypes/DataTypeCustomGeo.cpp +++ b/src/DataTypes/DataTypeCustomGeo.cpp @@ -38,6 +38,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory) return std::make_pair(DataTypeFactory::instance().get("array(polygon)"), std::make_unique(std::make_unique())); }); + + /// proton: starts + factory.registerClickHouseAlias("Point", "point"); + factory.registerClickHouseAlias("Ring", "ring"); + factory.registerClickHouseAlias("Polygon", "polygon"); + factory.registerClickHouseAlias("MultiPolygon", "multi_polygon"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index e9552c9049d..26bd21c1b8b 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -119,7 +119,7 @@ static std::pair create(const ASTPtr & argum ErrorCodes::BAD_ARGUMENTS); for (size_t i = 1; i < arguments->children.size(); ++i) - argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i], compatible_with_clickhouse)); + argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); @@ -129,7 +129,7 @@ static std::pair create(const ASTPtr & argum DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function); - DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName(), compatible_with_clickhouse); + DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()/* proton: starts */, compatible_with_clickhouse/* proton: ends */); if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) { @@ -146,7 +146,9 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) { factory.registerDataTypeCustom("simple_aggregate_function", create); + /// proton: starts factory.registerClickHouseAlias("SimpleAggregateFunction", "simple_aggregate_function"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index e7203536181..08befbaaf49 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -19,7 +19,9 @@ void registerDataTypeDate(DataTypeFactory & factory) { factory.registerSimpleDataType("date", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("Date", "date"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index dc00020377d..174e7e533df 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -19,7 +19,9 @@ void registerDataTypeDate32(DataTypeFactory & factory) factory.registerSimpleDataType( "date32", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("Date32", "date32"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeDomainBool.cpp b/src/DataTypes/DataTypeDomainBool.cpp index 89a4c428e54..8662a07bba5 100644 --- a/src/DataTypes/DataTypeDomainBool.cpp +++ b/src/DataTypes/DataTypeDomainBool.cpp @@ -16,7 +16,9 @@ void registerDataTypeDomainBool(DataTypeFactory & factory) factory.registerAlias("boolean", "bool"); + /// proton: starts factory.registerClickHouseAlias("Bool", "bool"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 1c371520124..69a94b68e0b 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -239,7 +239,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) } template -static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) throw Exception("Data type enum cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -301,10 +301,10 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com Int64 value = value_literal->value.get(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) - return createExact(arguments, compatible_with_clickhouse); + return createExact(arguments/* proton: starts */, compatible_with_clickhouse/* proton: ends */); } - return createExact(arguments, compatible_with_clickhouse); + return createExact(arguments/* proton: starts */, compatible_with_clickhouse/* proton: ends */); } void registerDataTypeEnum(DataTypeFactory & factory) @@ -316,9 +316,11 @@ void registerDataTypeEnum(DataTypeFactory & factory) /// MySQL /// factory.registerAlias("ENUM", "enum", DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("Enum8", "enum8"); factory.registerClickHouseAlias("Enum16", "enum16"); factory.registerClickHouseAlias("Enum", "enum"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 126edc10ec3..645c9633217 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -67,7 +67,6 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli const DataTypesDictionary & getCaseInsensitiveMap() const override { return case_insensitive_data_types; } String getFactoryName() const override { return "DataTypeFactory"; } - }; void registerDataTypeNumbers(DataTypeFactory & factory); diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 54f9a493d58..d6d95a48e7f 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -44,7 +44,7 @@ SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("The fixed_string data type family must have exactly one argument - size in bytes", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -61,7 +61,9 @@ void registerDataTypeFixedString(DataTypeFactory & factory) { factory.registerDataType("fixed_string", create); + /// proton: starts factory.registerClickHouseAlias("FixedString", "fixed_string"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeIPv4andIPv6.cpp b/src/DataTypes/DataTypeIPv4andIPv6.cpp index d6d6937e4ae..c73afc245f1 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.cpp +++ b/src/DataTypes/DataTypeIPv4andIPv6.cpp @@ -13,10 +13,12 @@ void registerDataTypeIPv4andIPv6(DataTypeFactory & factory) factory.registerSimpleDataType("ipv6", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); factory.registerAlias("inet6", "ipv6", DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("IPv4", "ipv4"); factory.registerClickHouseAlias("INET4", "inet4"); factory.registerClickHouseAlias("IPv6", "ipv6"); factory.registerClickHouseAlias("INET6", "inet6"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeInterval.cpp b/src/DataTypes/DataTypeInterval.cpp index f288567eaaf..1a1717722ab 100644 --- a/src/DataTypes/DataTypeInterval.cpp +++ b/src/DataTypes/DataTypeInterval.cpp @@ -25,6 +25,7 @@ void registerDataTypeInterval(DataTypeFactory & factory) factory.registerSimpleDataType("interval_quarter", [] { return DataTypePtr(std::make_shared(IntervalKind::Quarter)); }); factory.registerSimpleDataType("interval_year", [] { return DataTypePtr(std::make_shared(IntervalKind::Year)); }); + /// proton: starts factory.registerClickHouseAlias("IntervalNanosecond", "interval_nanosecond"); factory.registerClickHouseAlias("IntervalMicrosecond", "interval_microsecond"); factory.registerClickHouseAlias("IntervalMillisecond", "interval_millisecond"); @@ -36,6 +37,7 @@ void registerDataTypeInterval(DataTypeFactory & factory) factory.registerClickHouseAlias("IntervalMonth", "interval_month"); factory.registerClickHouseAlias("IntervalQuarter", "interval_quarter"); factory.registerClickHouseAlias("IntervalYear", "interval_year"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index aeb788107ce..34cc83e5c49 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -156,14 +156,16 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com throw Exception("The low_cardinality data type family must have single argument - type of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return std::make_shared(DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse)); + return std::make_shared(DataTypeFactory::instance().get(arguments->children[0]/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); } void registerDataTypeLowCardinality(DataTypeFactory & factory) { factory.registerDataType("low_cardinality", create); + /// proton: starts factory.registerClickHouseAlias("LowCardinality", "low_cardinality"); + /// proton: ends } diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index fd4aa700f97..f549471fa09 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -136,7 +136,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com nested_types.reserve(arguments->children.size()); for (const ASTPtr & child : arguments->children) - nested_types.emplace_back(DataTypeFactory::instance().get(child, compatible_with_clickhouse)); + nested_types.emplace_back(DataTypeFactory::instance().get(child/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); return std::make_shared(nested_types); } @@ -146,6 +146,8 @@ void registerDataTypeMap(DataTypeFactory & factory) { factory.registerDataType("map", create); + /// proton: starts factory.registerClickHouseAlias("Map", "map"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index 46a6d708d11..86b4e1588fd 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -48,7 +48,7 @@ static std::pair create(const ASTPtr & argum if (!name_type) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type"); - auto nested_type = DataTypeFactory::instance().get(name_type->type, compatible_with_clickhouse); + auto nested_type = DataTypeFactory::instance().get(name_type->type/* proton: starts */, compatible_with_clickhouse/* proton: ends */); nested_types.push_back(std::move(nested_type)); nested_names.push_back(name_type->name); } @@ -63,7 +63,9 @@ void registerDataTypeNested(DataTypeFactory & factory) { factory.registerDataTypeCustom("nested", create); + /// proton: starts factory.registerClickHouseAlias("Nested", "nested"); + /// proton: ends } DataTypePtr createNested(const DataTypes & types, const Names & names) diff --git a/src/DataTypes/DataTypeNothing.cpp b/src/DataTypes/DataTypeNothing.cpp index afc36b1f529..180f55846c5 100644 --- a/src/DataTypes/DataTypeNothing.cpp +++ b/src/DataTypes/DataTypeNothing.cpp @@ -27,7 +27,9 @@ void registerDataTypeNothing(DataTypeFactory & factory) { factory.registerSimpleDataType("nothing", [] { return DataTypePtr(std::make_shared()); }); + /// proton: starts factory.registerClickHouseAlias("Nothing", "nothing"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index a598d04e87d..439b10bc9cc 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -66,7 +66,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com if (!arguments || arguments->children.size() != 1) throw Exception("Nullable data type family must have exactly one argument - nested type", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0], compatible_with_clickhouse); + DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0]/* proton: starts */, compatible_with_clickhouse/* proton: ends */); return std::make_shared(nested_type); } @@ -76,7 +76,9 @@ void registerDataTypeNullable(DataTypeFactory & factory) { factory.registerDataType("nullable", create); + /// proton: starts factory.registerClickHouseAlias("Nullable", "nullable"); + /// proton: ends } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 7eaa8997a01..9d43a911fe2 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -68,6 +68,8 @@ void registerDataTypeObject(DataTypeFactory & factory) "json", [] { return std::make_shared("json", false); }, DataTypeFactory::CaseInsensitive); /// factory.registerSimpleDataType("nullable_json", [] { return std::make_shared("json", true); }, DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("JSON", "json"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 0bedb7d2c52..aebb89aaa67 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -39,7 +39,7 @@ SerializationPtr DataTypeString::doGetDefaultSerialization() const return std::make_shared(); } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (arguments && !arguments->children.empty()) { @@ -94,7 +94,8 @@ void registerDataTypeString(DataTypeFactory & factory) /// factory.registerAlias("VARBINARY", "string", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("GEOMETRY", "string", DataTypeFactory::CaseInsensitive); //mysql + /// proton: starts factory.registerClickHouseAlias("String", "string"); - // factory.registerClickHouseAlias("VARCHAR", "VARCHAR"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index edad7ec596b..25ad0d8919f 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -354,11 +354,11 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com { if (const auto * name_and_type_pair = child->as()) { - nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type, compatible_with_clickhouse)); + nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); names.emplace_back(name_and_type_pair->name); } else - nested_types.emplace_back(DataTypeFactory::instance().get(child, compatible_with_clickhouse)); + nested_types.emplace_back(DataTypeFactory::instance().get(child/* proton: starts */, compatible_with_clickhouse/* proton: ends */)); } if (names.empty()) @@ -374,7 +374,9 @@ void registerDataTypeTuple(DataTypeFactory & factory) { factory.registerDataType("tuple", create); + /// proton: starts factory.registerClickHouseAlias("Tuple", "tuple"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp index d47eb6932b2..d87783bb01c 100644 --- a/src/DataTypes/DataTypeUUID.cpp +++ b/src/DataTypes/DataTypeUUID.cpp @@ -30,7 +30,9 @@ void registerDataTypeUUID(DataTypeFactory & factory) { factory.registerSimpleDataType("uuid", [] { return DataTypePtr(std::make_shared()); }); + /// proton: starts factory.registerClickHouseAlias("UUID", "uuid"); + /// proton: ends } } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 3064ab9f22c..d9a05a9ce20 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -65,7 +65,7 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 2) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", @@ -85,7 +85,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com } template -static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.size() != 1) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", @@ -114,12 +114,14 @@ void registerDataTypeDecimal(DataTypeFactory & factory) /// factory.registerAlias("NUMERIC", "decimal", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("FIXED", "decimal", DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("Decimal32", "decimal32"); factory.registerClickHouseAlias("Decimal64", "decimal64"); factory.registerClickHouseAlias("Decimal128", "decimal128"); factory.registerClickHouseAlias("Decimal256", "decimal256"); factory.registerClickHouseAlias("Decimal", "decimal"); + /// proton: ends } /// Explicit template instantiations. diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index e5a64e3924b..2f5263bd64d 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes } template -static DataTypePtr createNumericDataType(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr createNumericDataType(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (arguments) { @@ -91,6 +91,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) /// factory.registerAlias("YEAR", "uint16", DataTypeFactory::CaseInsensitive); /// factory.registerAlias("TIME", "int64", DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("UInt8", "uint8"); factory.registerClickHouseAlias("UInt16", "uint16"); factory.registerClickHouseAlias("UInt32", "uint32"); @@ -118,6 +119,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerClickHouseAlias("BIGINT", "bigint"); factory.registerClickHouseAlias("FLOAT", "float"); factory.registerClickHouseAlias("DOUBLE", "double"); + /// proton: ends } } diff --git a/src/DataTypes/IDataTypeTranslator.h b/src/DataTypes/IDataTypeTranslator.h deleted file mode 100644 index 0c897cbc7b0..00000000000 --- a/src/DataTypes/IDataTypeTranslator.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -namespace DB -{ - -class IDataTypeTranslator -{ -public: - virtual ~IDataTypeTranslator() = default; - - virtual std::string translate(const std::string & type_name) = 0; -}; - -} diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 4da3db21979..7a77f84a58e 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -58,7 +58,7 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume return argument->value.get(); } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]]/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(); @@ -77,7 +77,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool com return std::make_shared(timezone.value_or(String{})); } -static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(); @@ -90,7 +90,7 @@ static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, bool c return std::make_shared(timezone); } -static DataTypePtr create64(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse [[maybe_unused]] = false/* proton: ends */) +static DataTypePtr create64(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) { if (!arguments || arguments->children.empty()) return std::make_shared(DataTypeDateTime64::default_scale); @@ -112,9 +112,11 @@ void registerDataTypeDateTime(DataTypeFactory & factory) /// factory.registerAlias("TIMESTAMP", "datetime", DataTypeFactory::CaseInsensitive); + /// proton: starts factory.registerClickHouseAlias("Datetime", "datetime"); factory.registerClickHouseAlias("Datetime32", "datetime32"); factory.registerClickHouseAlias("Datetime64", "datetime64"); + /// proton: ends } } diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 0afa1a4b625..60ea9ae1646 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 554b464197b..831898437c1 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "Storages/ExternalTable/ExternalTableFactory.h" namespace DB { @@ -11,7 +12,7 @@ namespace DB namespace ExternalTable { -ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings, ContextPtr & /*context*/) +ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings) : table(settings->table.changed ? settings->table.value : name) , logger(&Poco::Logger::get("ExternalTable-ClickHouse-" + table)) { @@ -110,4 +111,12 @@ ColumnsDescription ClickHouse::getTableStructure() } +void registerClickHouseExternalTable(ExternalTableFactory & factory) +{ + factory.registerExternalTable("clickhouse", [](const String & name, ExternalTableSettingsPtr settings) + { + return std::make_unique(name, std::move(settings)); + }); +} + } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h index 8b7b0c2f7e5..d4324f4b518 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include namespace DB @@ -13,7 +13,7 @@ namespace ExternalTable class ClickHouse final : public IExternalTable { public: - explicit ClickHouse(const String & name, ExternalTableSettingsPtr settings, ContextPtr & context [[maybe_unused]]); + explicit ClickHouse(const String & name, ExternalTableSettingsPtr settings); void startup() override; void shutdown() override {} diff --git a/src/Storages/ExternalTable/ExternalTableFactory.cpp b/src/Storages/ExternalTable/ExternalTableFactory.cpp index dcc5bba72d8..67289211b87 100644 --- a/src/Storages/ExternalTable/ExternalTableFactory.cpp +++ b/src/Storages/ExternalTable/ExternalTableFactory.cpp @@ -8,13 +8,15 @@ namespace ErrorCodes extern const int UNKNOWN_TYPE; } +void registerClickHouseExternalTable(ExternalTableFactory & factory); + ExternalTableFactory & ExternalTableFactory::instance() { static DB::ExternalTableFactory ret; return ret; } -void ExternalTableFactory::registerExternalTable(const std::string & type, Creator creator) +void ExternalTableFactory::registerExternalTable(const String & type, Creator creator) { if (creators.contains(type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "ExternalTableFactory: type {} is already registered", type); @@ -22,12 +24,18 @@ void ExternalTableFactory::registerExternalTable(const std::string & type, Creat creators[type] = std::move(creator); } -IExternalTablePtr ExternalTableFactory::getExternalTable(const std::string & type, ExternalTableSettingsPtr settings) const +IExternalTablePtr ExternalTableFactory::getExternalTable(const String & name, ExternalTableSettingsPtr settings) const { + auto type = settings->type.value; if (!creators.contains(type)) throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown external table type {}", type); - return creators.at(type)(std::move(settings)); + return creators.at(type)(name, std::move(settings)); +} + +ExternalTableFactory::ExternalTableFactory() +{ + registerClickHouseExternalTable(*this); } } diff --git a/src/Storages/ExternalTable/ExternalTableFactory.h b/src/Storages/ExternalTable/ExternalTableFactory.h index e8cc52781ac..da925003a90 100644 --- a/src/Storages/ExternalTable/ExternalTableFactory.h +++ b/src/Storages/ExternalTable/ExternalTableFactory.h @@ -1,7 +1,7 @@ #pragma once #include -#include "Storages/ExternalTable/ExternalTableImpl.h" +#include "Storages/ExternalTable/IExternalTable.h" #include "Storages/ExternalTable/ExternalTableSettings.h" namespace DB @@ -13,12 +13,14 @@ class ExternalTableFactory final : private boost::noncopyable public: static ExternalTableFactory & instance(); - using Creator = std::function; + using Creator = std::function; - IExternalTablePtr getExternalTable(const std::string & type, ExternalTableSettingsPtr settings) const; - void registerExternalTable(const std::string & type, Creator creator); + IExternalTablePtr getExternalTable(const String & name, ExternalTableSettingsPtr settings) const; + void registerExternalTable(const String & type, Creator creator); private: + ExternalTableFactory(); + std::unordered_map creators; }; diff --git a/src/Storages/ExternalTable/ExternalTableImpl.h b/src/Storages/ExternalTable/IExternalTable.h similarity index 100% rename from src/Storages/ExternalTable/ExternalTableImpl.h rename to src/Storages/ExternalTable/IExternalTable.h diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index 450cb03ca49..62b3c9c799c 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -2,6 +2,7 @@ #include #include #include +#include "Storages/ExternalTable/ExternalTableFactory.h" namespace DB { @@ -12,14 +13,7 @@ StorageExternalTable::StorageExternalTable( : IStorage(args.table_id) , WithContext(args.getContext()->getGlobalContext()) { - auto type = settings->type.value; - if (type == "clickhouse") - { - auto ctx = getContext(); - external_table = std::make_unique(args.table_id.getTableName(), std::move(settings), ctx); - } - else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown external table type: {}", type); + external_table = ExternalTableFactory::instance().getExternalTable(args.table_id.getTableName(), std::move(settings)); setStorageMetadata(args); } diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index 7ece55a72bf..7dee5a7fea7 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include From 62c0436e96050ef94cfe91004622cada28115851 Mon Sep 17 00:00:00 2001 From: Jove Zhong Date: Thu, 1 Feb 2024 10:54:36 -0800 Subject: [PATCH 23/26] add example for clickhouse external table --- examples/README.md | 6 +- examples/clickhouse/README.md | 76 ++++++++++++++++++++++++++ examples/clickhouse/docker-compose.yml | 54 ++++++++++++++++++ 3 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 examples/clickhouse/README.md create mode 100644 examples/clickhouse/docker-compose.yml diff --git a/examples/README.md b/examples/README.md index f56c56c0ea9..703cc9ff0ab 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,12 +8,14 @@ This folder lists some examples to run Proton in various use cases. For more rea - cdc: demonstrates how to use Debezium to sync database changes from MySQL to Proton, via Redpanda and show live updates(UPSERT and DELETE) in Proton via changelog stream. +- clickhouse: demonstrates how to read from ClickHouse or write to ClickHouse with the new External Table feature. + - ecommerce: a combination of Proton, Redpanda, owl-shop and Redpanda Console. Owl Shop is an imaginary ecommerce shop that simulates microservices exchanging data via Apache Kafka. Sample data streams are: clickstreams(frontend events), customer info, customer orders. [Learn more](https://docs.timeplus.com/proton-kafka#tutorial) - fraud_detection: demonstrates how to leverage proton to build a real-time fraud detection where proton is used as a real-time feature store. -- hackernews: just two containers: Proton and [a bytewax-based data loader](https://github.com/timeplus-io/proton-python-driver/tree/develop/example/bytewax). Inspired by https://bytewax.io/blog/polling-hacker-news, you can call Hacker News HTTP API with Bytewax and send latest news to Proton for SQL-based analysis. - - grafana: an example of how to use Grafana to connect to Proton and visualize the query results. +- hackernews: just two containers: Proton and [a bytewax-based data loader](https://github.com/timeplus-io/proton-python-driver/tree/develop/example/bytewax). Inspired by https://bytewax.io/blog/polling-hacker-news, you can call Hacker News HTTP API with Bytewax and send latest news to Proton for SQL-based analysis. + - jdbc: demonstrates how to connect to Proton via JDBC using DBeaver or Metabase. \ No newline at end of file diff --git a/examples/clickhouse/README.md b/examples/clickhouse/README.md new file mode 100644 index 00000000000..c47f334f21b --- /dev/null +++ b/examples/clickhouse/README.md @@ -0,0 +1,76 @@ +# Demo for ClickHouse External Table + +This docker compose file demonstrates how to read from ClickHouse or write to ClickHouse with the new [External Table](https://docs.timeplus.com/proton-clickhouse-external-table) feature. + +A YouTube video tutorial is available for visual learners: TBD + +## Start the example + +Simply run `docker compose up` in this folder. Three docker containers in the stack: + +1. ghcr.io/timeplus-io/proton:latest, as the streaming SQL engine. +2. clickhouse/clickhouse-server:latest +3. quay.io/cloudhut/owl-shop:latest, as the data generator. [Owl Shop](https://github.com/cloudhut/owl-shop) is an imaginary ecommerce shop that simulates microservices exchanging data via Apache Kafka. +4. docker.redpanda.com/redpandadata/redpanda, as the Kafka compatiable streaming message bus +5. docker.redpanda.com/redpandadata/console, as the web UI to explore data in Kafka/Redpanda + +When all containers are up running, a few topics will be created in Redpanda with live demo. + +## Read data from Redpanda, apply ETL and write to ClickHouse +Open the `proton client` in the proton container. Run the following SQL to create an external stream to read live data from Redpanda. + +```sql +CREATE EXTERNAL STREAM frontend_events(raw string) +SETTINGS type='kafka', + brokers='redpanda:9092', + topic='owlshop-frontend-events'; +``` + +Open the `clickhouse client` in the clickhouse container. Run the following SQL to create a regular MergeTree table. + +```sql +CREATE TABLE events +( + _tp_time DateTime64(3), + url String, + method String, + ip String +) +ENGINE=MergeTree() +PRIMARY KEY (_tp_time, url); +``` + +Go back to `proton client`, run the following SQL to create an external table to connect to ClickHouse: +```sql +CREATE EXTERNAL TABLE ch_local +SETTINGS type='clickhouse', + address='clickhouse:9000', + table='events'; +``` + +Then create a materialized view to read data from Redpanda, extract the values and turn the IP to masked md5, and send data to the external table. By doing so, the transformed data will be written to ClickHouse continuously. + +```sql +CREATE MATERIALIZED VIEW mv INTO ch_local AS + SELECT now64() AS _tp_time, + raw:requestedUrl AS url, + raw:method AS method, + lower(hex(md5(raw:ipAddress))) AS ip + FROM frontend_events; +``` + +## Read data from ClickHouse + +You can run the following SQL to query ClickHouse: + +```sql +SELECT * FROM ch_local; +``` + +Or apply SQL functions or group by, such as + +```sql +SELECT method, count() AS cnt FROM ch_local GROUP BY method +``` + +Please note, Proton will read all rows with selected columns from the ClickHouse and apply aggregation locally. Check [External Table](https://docs.timeplus.com/proton-clickhouse-external-table) documentation for details. \ No newline at end of file diff --git a/examples/clickhouse/docker-compose.yml b/examples/clickhouse/docker-compose.yml new file mode 100644 index 00000000000..76b75cce0a8 --- /dev/null +++ b/examples/clickhouse/docker-compose.yml @@ -0,0 +1,54 @@ +version: '3.7' +name: proton-ch-demo +volumes: + redpanda: null +services: + proton: + image: ghcr.io/timeplus-io/proton:latest + pull_policy: always + + clickhouse: + image: clickhouse/clickhouse-server:latest + ports: + - 9000:9000 + ulimits: + nofile: + soft: 262144 + hard: 262144 + + redpanda: + image: docker.redpanda.com/redpandadata/redpanda:v23.2.15 + command: + - redpanda start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + - --advertise-kafka-addr internal://redpanda:9092,external://localhost:19092 + - --smp 1 + - --memory 1G + - --mode dev-container + volumes: + - redpanda:/var/lib/redpanda/data + + redpanda-console: + image: docker.redpanda.com/redpandadata/console:v2.3.5 + entrypoint: /bin/sh + command: -c "echo \"$$CONSOLE_CONFIG_FILE\" > /tmp/config.yml; /app/console" + environment: + CONFIG_FILEPATH: /tmp/config.yml + CONSOLE_CONFIG_FILE: | + kafka: + brokers: ["redpanda:9092"] + ports: + - 8080:8080 + depends_on: + - redpanda + + owl-shop: + image: quay.io/cloudhut/owl-shop:latest + #platform: 'linux/amd64' + environment: + - SHOP_KAFKA_BROKERS=redpanda:9092 + - SHOP_KAFKA_TOPICREPLICATIONFACTOR=1 + - SHOP_TRAFFIC_INTERVAL_RATE=1 + - SHOP_TRAFFIC_INTERVAL_DURATION=0.1s + depends_on: + - redpanda From 53bc14a7c53505677145ab54216c6f1c76fb7246 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Thu, 1 Feb 2024 09:45:08 -0800 Subject: [PATCH 24/26] clean up --- src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index c756d717694..c2d1a62072c 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -75,9 +75,7 @@ void ClickHouseSink::consume(Chunk chunk) auto block = getHeader().cloneWithColumns(chunk.detachColumns()); output_format->write(block); - String query_to_sent {buf->str()}; - // conn->forceConnected(params.timeouts); /// The connection chould have been idle for too long - client->executeInsertQuery(query_to_sent); + client->executeInsertQuery(buf->str()); client->throwServerExceptionIfAny(); } From 67e159bb7f525a633c07f85189ff2fb1cb3d1ad8 Mon Sep 17 00:00:00 2001 From: Gimi Liang Date: Thu, 1 Feb 2024 10:55:50 -0800 Subject: [PATCH 25/26] fixed issues found when doing tests. --- src/Client/ClickHouseClient.cpp | 18 +++++++--- src/Client/ClickHouseClient.h | 2 +- .../ExternalTable/ClickHouse/ClickHouse.cpp | 11 +++--- .../ExternalTable/ClickHouse/ClickHouse.h | 1 + .../ClickHouse/ClickHouseSink.cpp | 8 +++-- .../ExternalTable/ClickHouse/ClickHouseSink.h | 1 + .../ClickHouse/ClickHouseSource.cpp | 7 ++-- .../ClickHouse/ClickHouseSource.h | 1 + .../ExternalTable/StorageExternalTable.cpp | 34 ++++++++++++++++--- .../ExternalTable/StorageExternalTable.h | 1 + 10 files changed, 65 insertions(+), 19 deletions(-) diff --git a/src/Client/ClickHouseClient.cpp b/src/Client/ClickHouseClient.cpp index a73840d1399..e01f7478998 100644 --- a/src/Client/ClickHouseClient.cpp +++ b/src/Client/ClickHouseClient.cpp @@ -59,13 +59,14 @@ void ClickHouseClient::reset() server_exception = nullptr; } -void ClickHouseClient::executeQuery(const String & query, const String & query_id) +void ClickHouseClient::executeQuery(const String & query, const String & query_id, bool fail_quick) { assert(!has_running_query); has_running_query = true; reset(); + bool suppress_error_log {false}; while (true) { try @@ -84,24 +85,33 @@ void ClickHouseClient::executeQuery(const String & query, const String & query_i } catch (const Exception & e) { + if (fail_quick) + e.rethrow(); + /// connection lost if (!connection->checkConnected()) { - LOG_ERROR(logger, "Connection lost"); + if (!suppress_error_log) + LOG_ERROR(logger, "Connection lost"); /// set the connection not connected so that sendQuery will reconnect connection->disconnect(); + std::this_thread::sleep_for(std::chrono::seconds(2)); } /// Retry when the server said "Client should retry" and no rows has been received yet. else if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED) { - LOG_ERROR(logger, "Got a transient error from the server, will retry in 1 second"); + if (!suppress_error_log) + LOG_ERROR(logger, "Got a transient error from the server, will retry in 1 second"); std::this_thread::sleep_for(std::chrono::seconds(1)); } else { has_running_query = false; - throw; + e.rethrow(); } + + /// Otherwise, it will keep generating the same error log again and again until the connection is back. + suppress_error_log = true; } } } diff --git a/src/Client/ClickHouseClient.h b/src/Client/ClickHouseClient.h index 1db3260217c..2ed7b200fcc 100644 --- a/src/Client/ClickHouseClient.h +++ b/src/Client/ClickHouseClient.h @@ -19,7 +19,7 @@ class ClickHouseClient final /// Sends the query to the server to execute. For insert queries, use `executeInsertQuery` instead. /// Make sure keep calling the `pollData` method until it returns an empty optional, until which the /// client won't be able to execute another query. - void executeQuery(const String & query, const String & query_id = ""); + void executeQuery(const String & query, const String & query_id = "", bool fail_quick = false); /// Sends an insert query to the server to execute. The difference between this and executeQuery is that, /// after calling this method, there is no need to call the `pollData` method. void executeInsertQuery(const String & query, const String & query_id = ""); diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp index 831898437c1..dab21acc242 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp @@ -13,7 +13,8 @@ namespace ExternalTable { ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings) - : table(settings->table.changed ? settings->table.value : name) + : database(settings->database.value) + , table(settings->table.changed ? settings->table.value : name) , logger(&Poco::Logger::get("ExternalTable-ClickHouse-" + table)) { assert(settings->type.value == "clickhouse"); @@ -58,13 +59,13 @@ Pipe ClickHouse::read( { auto header = storage_snapshot->getSampleBlockForColumns(column_names); auto client = std::make_unique(connection_params, logger); - auto source = std::make_shared(table, std::move(header), std::move(client), processed_stage, context, logger); + auto source = std::make_shared(database, table, std::move(header), std::move(client), processed_stage, context, logger); return Pipe(std::move(source)); } SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { - return std::make_shared(table, metadata_snapshot->getSampleBlock(), connection_params, context, logger); + return std::make_shared(database, table, metadata_snapshot->getSampleBlock(), connection_params, context, logger); } ColumnsDescription ClickHouse::getTableStructure() @@ -72,7 +73,9 @@ ColumnsDescription ClickHouse::getTableStructure() ColumnsDescription ret {}; ClickHouseClient client {connection_params, logger}; - client.executeQuery("DESCRIBE TABLE " + table); + auto query = "DESCRIBE TABLE " + (database.empty() ? "" : backQuoteIfNeed(database) + ".") + backQuoteIfNeed(table); + /// This has to fail quickly otherwise it will block Proton from starting. + client.executeQuery(query, /*query_id=*/"", /*fail_quick=*/true); LOG_INFO(logger, "Receiving table schema"); while (true) { diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h index d4324f4b518..adfa798ac34 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouse.h @@ -33,6 +33,7 @@ class ClickHouse final : public IExternalTable private: ConnectionParameters connection_params; + String database; String table; Poco::Logger * logger; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp index c2d1a62072c..42b6b80b998 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp @@ -14,12 +14,12 @@ namespace ExternalTable namespace { -String constructInsertQuery(const String & table, const Block & header) +String constructInsertQuery(const String & database, const String & table, const Block & header) { assert(header.columns()); const auto & col_names = header.getNames(); - auto query = "INSERT INTO " + backQuoteIfNeed(table) + " (" + backQuoteIfNeed(col_names[0]); + auto query = "INSERT INTO " + (database.empty() ? "" : backQuoteIfNeed(database) + ".") + backQuoteIfNeed(table) + " (" + backQuoteIfNeed(col_names[0]); for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) query.append(", " + backQuoteIfNeed(name)); query.append(") VALUES "); @@ -30,13 +30,15 @@ String constructInsertQuery(const String & table, const Block & header) } ClickHouseSink::ClickHouseSink( + +const String & database, const String & table, const Block & header, const ConnectionParameters & params_, ContextPtr context_, Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) - , insert_into(constructInsertQuery(table, header)) + , insert_into(constructInsertQuery(database, table, header)) , client(std::make_unique(params_, logger_)) , context(context_) , logger(logger_) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h index 9ec7bdb15bc..ce7a178504f 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h @@ -14,6 +14,7 @@ class ClickHouseSink final : public SinkToStorage { public: ClickHouseSink( + const String & database, const String & table, const Block & header, const ConnectionParameters & params_, diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp index 9718d09d59c..0dd1d066794 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp @@ -9,7 +9,7 @@ namespace ExternalTable namespace { -String constructSelectQuery(const String & table, const Block & header) +String constructSelectQuery(const String & database, const String & table, const Block & header) { assert(header.columns()); const auto & col_names = header.getNames(); @@ -17,7 +17,7 @@ String constructSelectQuery(const String & table, const Block & header) auto query = "SELECT " + backQuoteIfNeed(col_names[0]); for (const auto & name : std::vector(std::next(col_names.begin()), col_names.end())) query.append(", " + backQuoteIfNeed(name)); - query.append(" FROM " + table); + query.append(" FROM " + (database.empty() ? "" : backQuoteIfNeed(database) + ".") + table); return query; } @@ -25,6 +25,7 @@ String constructSelectQuery(const String & table, const Block & header) } ClickHouseSource::ClickHouseSource( + const String & database, const String & table, const Block & header, std::unique_ptr client_, @@ -33,7 +34,7 @@ ClickHouseSource::ClickHouseSource( Poco::Logger * logger_) : ISource(header, true, ProcessorID::ClickHouseSourceID) , client(std::move(client_)) - , query(constructSelectQuery(table, header)) + , query(constructSelectQuery(database, table, header)) , context(context_) , logger(logger_) { diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h index 0f0f7d898f3..13169a68978 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h +++ b/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h @@ -13,6 +13,7 @@ class ClickHouseSource final : public ISource { public: ClickHouseSource( + const String & database, const String & table, const Block & header, std::unique_ptr client_, diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index 62b3c9c799c..ecd5665d8bc 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -1,8 +1,9 @@ +#include #include #include -#include #include -#include "Storages/ExternalTable/ExternalTableFactory.h" +#include +#include namespace DB { @@ -15,7 +16,33 @@ StorageExternalTable::StorageExternalTable( { external_table = ExternalTableFactory::instance().getExternalTable(args.table_id.getTableName(), std::move(settings)); - setStorageMetadata(args); + /// First, setStorageMetadata should be allowed to fail (the only failable part is getTableStructure function call), otherwise it will block Proton from starting up. + /// Second, when it fails, the exception should be caught, otherwise, Proton will fail to start. + /// TODO we could use cache to save the table structure, so that when Proton restarts it could read from the cache directly. + try + { + setStorageMetadata(args); + } + catch (const Exception & e) + { + LOG_ERROR(&Poco::Logger::get("ExternalTable-ClickHouse" + args.table_id.getFullTableName()), + "Failed to fetch table structure, error: {}. Will keep retrying in background", e.what()); + background_jobs.scheduleOrThrowOnError([this](){ + while (!is_dropped) + { + try + { + std::this_thread::sleep_for(std::chrono::seconds(5)); + auto metadata = getInMemoryMetadata(); + metadata.setColumns(external_table->getTableStructure()); + setInMemoryMetadata(metadata); + break; + } + catch (const Exception &) { } + } + }); + } + } Pipe StorageExternalTable::read( @@ -42,7 +69,6 @@ void StorageExternalTable::setStorageMetadata(const StorageFactory::Arguments & { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(external_table->getTableStructure()); - storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index 7dee5a7fea7..f5c8d1e86a3 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -46,6 +46,7 @@ class StorageExternalTable final : public shared_ptr_helper Date: Sat, 3 Feb 2024 02:07:43 -0800 Subject: [PATCH 26/26] PR comments --- src/CMakeLists.txt | 3 + src/ClickHouse/CMakeLists.txt | 3 + .../Client.cpp} | 59 +++++++----------- .../Client.h} | 22 ++++--- .../Sink.cpp} | 26 ++++---- .../ClickHouseSink.h => ClickHouse/Sink.h} | 12 ++-- .../Source.cpp} | 20 ++---- .../Source.h} | 17 +++-- src/Client/CMakeLists.txt | 2 +- src/DataTypes/DataTypeAggregateFunction.cpp | 2 +- src/DataTypes/DataTypeArray.cpp | 2 +- .../DataTypeCustomSimpleAggregateFunction.cpp | 2 +- src/DataTypes/DataTypeEnum.cpp | 4 +- src/DataTypes/DataTypeFactory.h | 6 +- src/DataTypes/DataTypeFixedString.cpp | 2 +- src/DataTypes/DataTypeLowCardinality.cpp | 2 +- src/DataTypes/DataTypeMap.cpp | 2 +- src/DataTypes/DataTypeNested.cpp | 2 +- src/DataTypes/DataTypeNullable.cpp | 2 +- src/DataTypes/DataTypeString.cpp | 2 +- src/DataTypes/DataTypeTuple.cpp | 2 +- src/DataTypes/DataTypesDecimal.cpp | 4 +- src/DataTypes/registerDataTypeDateTime.cpp | 4 +- src/Databases/DatabaseOnDisk.cpp | 13 ---- src/Interpreters/InterpreterDropQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.cpp | 3 + src/Parsers/ParserQueryWithOutput.cpp | 5 -- src/Storages/ExternalTable/CMakeLists.txt | 1 - .../{ClickHouse => }/ClickHouse.cpp | 62 +++++++++++-------- .../{ClickHouse => }/ClickHouse.h | 4 +- .../ExternalTable/ExternalTableSettings.h | 3 +- .../ExternalTable/StorageExternalTable.cpp | 58 ++++++++++------- .../ExternalTable/StorageExternalTable.h | 8 ++- 33 files changed, 178 insertions(+), 183 deletions(-) create mode 100644 src/ClickHouse/CMakeLists.txt rename src/{Client/ClickHouseClient.cpp => ClickHouse/Client.cpp} (81%) rename src/{Client/ClickHouseClient.h => ClickHouse/Client.h} (71%) rename src/{Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp => ClickHouse/Sink.cpp} (77%) rename src/{Storages/ExternalTable/ClickHouse/ClickHouseSink.h => ClickHouse/Sink.h} (71%) rename src/{Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp => ClickHouse/Source.cpp} (71%) rename src/{Storages/ExternalTable/ClickHouse/ClickHouseSource.h => ClickHouse/Source.h} (50%) rename src/Storages/ExternalTable/{ClickHouse => }/ClickHouse.cpp (63%) rename src/Storages/ExternalTable/{ClickHouse => }/ClickHouse.h (91%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5d061ec3cdb..44a21789ffe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -247,6 +247,9 @@ else() message(FATAL "rdkafka is not enabled which is required") endif() +add_subdirectory(ClickHouse) +add_object_library(clickhouse_clickhouse ClickHouse) + add_subdirectory(Storages/ExternalStream) add_subdirectory(Storages/ExternalTable) # proton: end diff --git a/src/ClickHouse/CMakeLists.txt b/src/ClickHouse/CMakeLists.txt new file mode 100644 index 00000000000..3ca0cf6c964 --- /dev/null +++ b/src/ClickHouse/CMakeLists.txt @@ -0,0 +1,3 @@ +# if (ENABLE_TESTS) +# add_subdirectory(tests) +# endif () diff --git a/src/Client/ClickHouseClient.cpp b/src/ClickHouse/Client.cpp similarity index 81% rename from src/Client/ClickHouseClient.cpp rename to src/ClickHouse/Client.cpp index e01f7478998..c04b5bd0237 100644 --- a/src/Client/ClickHouseClient.cpp +++ b/src/ClickHouse/Client.cpp @@ -1,5 +1,5 @@ +#include #include -#include namespace DB { @@ -12,27 +12,11 @@ extern const int UNKNOWN_PACKET_FROM_SERVER; extern const int UNEXPECTED_PACKET_FROM_SERVER; } -namespace +namespace ClickHouse { -std::unique_ptr createConnection(const ConnectionParameters & parameters) +namespace { - auto ret = std::make_unique( - parameters.host, - parameters.port, - parameters.default_database, - parameters.user, - parameters.password, - parameters.quota_key, - "", /* cluster */ - "", /* cluster_secret */ - "TimeplusProton", - parameters.compression, - parameters.security); - - ret->setCompatibleWithClickHouse(); - return ret; -} size_t calculatePollInterval(const ConnectionTimeouts & timeouts) { @@ -44,22 +28,23 @@ size_t calculatePollInterval(const ConnectionTimeouts & timeouts) } -ClickHouseClient::ClickHouseClient(ConnectionParameters params_, Poco::Logger * logger_) - : params(params_) - , connection(createConnection(params)) - , poll_interval(calculatePollInterval(params.timeouts)) +Client::Client(DB::ConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_) + : connection(std::move(connection_)) + , timeouts(std::move(timeouts_)) + , poll_interval(calculatePollInterval(timeouts)) , logger(logger_) { + connection->setCompatibleWithClickHouse(); } -void ClickHouseClient::reset() +void Client::reset() { cancelled = false; processed_rows = 0; server_exception = nullptr; } -void ClickHouseClient::executeQuery(const String & query, const String & query_id, bool fail_quick) +void Client::executeQuery(const String & query, const String & query_id, bool fail_quick) { assert(!has_running_query); has_running_query = true; @@ -72,7 +57,7 @@ void ClickHouseClient::executeQuery(const String & query, const String & query_i try { connection->sendQuery( - params.timeouts, + timeouts, query, {} /*query_parameters*/, query_id, @@ -116,13 +101,13 @@ void ClickHouseClient::executeQuery(const String & query, const String & query_i } } -void ClickHouseClient::executeInsertQuery(const String & query, const String & query_id) +void Client::executeInsertQuery(const String & query, const String & query_id) { executeQuery(query, query_id); receiveEndOfQuery(); } -std::optional ClickHouseClient::pollData() +std::optional Client::pollData() { if (!has_running_query) return std::nullopt; @@ -136,11 +121,11 @@ std::optional ClickHouseClient::pollData() if (!cancelled) { double elapsed = receive_watch.elapsedSeconds(); - if (elapsed > params.timeouts.receive_timeout.totalSeconds()) + if (elapsed > timeouts.receive_timeout.totalSeconds()) { cancelQuery(); - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded while receiving data from server. Waited for {} seconds, timeout is {} seconds", static_cast(elapsed), params.timeouts.receive_timeout.totalSeconds()); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded while receiving data from server. Waited for {} seconds, timeout is {} seconds", static_cast(elapsed), timeouts.receive_timeout.totalSeconds()); } } @@ -162,7 +147,7 @@ std::optional ClickHouseClient::pollData() } } -void ClickHouseClient::cancelQuery() +void Client::cancelQuery() { if (!has_running_query) return; @@ -175,7 +160,7 @@ void ClickHouseClient::cancelQuery() /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. -bool ClickHouseClient::receiveAndProcessPacket() +bool Client::receiveAndProcessPacket() { assert(has_running_query); @@ -230,7 +215,7 @@ bool ClickHouseClient::receiveAndProcessPacket() } /// Process Log packets, exit when receive Exception or EndOfStream -bool ClickHouseClient::receiveEndOfQuery() +bool Client::receiveEndOfQuery() { while (true) { @@ -266,21 +251,23 @@ bool ClickHouseClient::receiveEndOfQuery() } } -void ClickHouseClient::onEndOfStream() +void Client::onEndOfStream() { has_running_query = false; } -void ClickHouseClient::onServerException(std::unique_ptr && exception) +void Client::onServerException(std::unique_ptr && exception) { server_exception.swap(exception); has_running_query = false; } -void ClickHouseClient::throwServerExceptionIfAny() +void Client::throwServerExceptionIfAny() { if (server_exception) server_exception->rethrow(); } } + +} diff --git a/src/Client/ClickHouseClient.h b/src/ClickHouse/Client.h similarity index 71% rename from src/Client/ClickHouseClient.h rename to src/ClickHouse/Client.h index 2ed7b200fcc..8cd99bb9c54 100644 --- a/src/Client/ClickHouseClient.h +++ b/src/ClickHouse/Client.h @@ -1,20 +1,22 @@ #pragma once -#include -#include +#include namespace DB { -/// ClickHouseClient is a client that is compatiable with the ClickHouse protocol and can be used to talk to ClickHouse servers. +namespace ClickHouse +{ + +/// This is a client that is compatiable with the ClickHouse protocol and can be used to talk to ClickHouse servers. /// Note: -/// * This client is designed for the ClickHouse ExternalTable, so it's not 100% compatiable with ClickHouse protocol, it just needs to make sure the ExternalTable is functional. +/// * This client is designed to be used in the ClickHouse ExternalTable, so it's not 100% compatiable with ClickHouse protocol, it just needs to make sure the ExternalTable is functional. /// * A client object should not be shared with multiple threads. -class ClickHouseClient final +class Client final { public: - ClickHouseClient(ConnectionParameters params_, Poco::Logger * logger_); + Client(DB::ConnectionPool::Entry connection_, ConnectionTimeouts timeouts_, Poco::Logger * logger_); /// Sends the query to the server to execute. For insert queries, use `executeInsertQuery` instead. /// Make sure keep calling the `pollData` method until it returns an empty optional, until which the @@ -40,17 +42,19 @@ class ClickHouseClient final void onEndOfStream(); void onServerException(std::unique_ptr && exception); - ConnectionParameters params; - std::unique_ptr connection; + DB::ConnectionPool::Entry connection; + ConnectionTimeouts timeouts; size_t poll_interval; bool has_running_query {false}; bool cancelled {false}; size_t processed_rows {0}; Block polled_data; - std::unique_ptr server_exception {nullptr}; + std::unique_ptr server_exception; Poco::Logger * logger; }; } + +} diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp b/src/ClickHouse/Sink.cpp similarity index 77% rename from src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp rename to src/ClickHouse/Sink.cpp index 42b6b80b998..087807bc106 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.cpp +++ b/src/ClickHouse/Sink.cpp @@ -1,14 +1,11 @@ -#include +#include #include -#include -#include #include -#include namespace DB { -namespace ExternalTable +namespace ClickHouse { namespace @@ -29,17 +26,16 @@ String constructInsertQuery(const String & database, const String & table, const } -ClickHouseSink::ClickHouseSink( - -const String & database, - const String & table, - const Block & header, - const ConnectionParameters & params_, - ContextPtr context_, - Poco::Logger * logger_) +Sink::Sink( + const String & database, + const String & table, + const Block & header, + std::unique_ptr client_, + ContextPtr context_, + Poco::Logger * logger_) : SinkToStorage(header, ProcessorID::ExternalTableDataSinkID) , insert_into(constructInsertQuery(database, table, header)) - , client(std::make_unique(params_, logger_)) + , client(std::move(client_)) , context(context_) , logger(logger_) { @@ -67,7 +63,7 @@ explicit BufferResetter(WriteBufferFromOwnString & buf_): buf(buf_) {} } -void ClickHouseSink::consume(Chunk chunk) +void Sink::consume(Chunk chunk) { if (!chunk.rows()) return; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h b/src/ClickHouse/Sink.h similarity index 71% rename from src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h rename to src/ClickHouse/Sink.h index ce7a178504f..e028617b59b 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSink.h +++ b/src/ClickHouse/Sink.h @@ -1,23 +1,23 @@ #pragma once -#include +#include #include #include namespace DB { -namespace ExternalTable +namespace ClickHouse { -class ClickHouseSink final : public SinkToStorage +class Sink final : public SinkToStorage { public: - ClickHouseSink( + Sink( const String & database, const String & table, const Block & header, - const ConnectionParameters & params_, + std::unique_ptr client_, ContextPtr context_, Poco::Logger * logger_); @@ -28,7 +28,7 @@ class ClickHouseSink final : public SinkToStorage private: String insert_into; - std::unique_ptr client; + std::unique_ptr client; std::unique_ptr buf; OutputFormatPtr output_format; diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp b/src/ClickHouse/Source.cpp similarity index 71% rename from src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp rename to src/ClickHouse/Source.cpp index 0dd1d066794..9c80b22fb65 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.cpp +++ b/src/ClickHouse/Source.cpp @@ -1,10 +1,10 @@ +#include #include -#include namespace DB { -namespace ExternalTable +namespace ClickHouse { namespace @@ -24,23 +24,20 @@ String constructSelectQuery(const String & database, const String & table, const } -ClickHouseSource::ClickHouseSource( +Source::Source( const String & database, const String & table, const Block & header, - std::unique_ptr client_, - QueryProcessingStage::Enum /*processed_stage*/, - ContextPtr context_, - Poco::Logger * logger_) + std::unique_ptr client_, + ContextPtr context_) : ISource(header, true, ProcessorID::ClickHouseSourceID) , client(std::move(client_)) , query(constructSelectQuery(database, table, header)) , context(context_) - , logger(logger_) { } -Chunk ClickHouseSource::generate() +Chunk Source::generate() { if (isCancelled()) { @@ -56,16 +53,11 @@ Chunk ClickHouseSource::generate() client->executeQuery(query); } - LOG_INFO(logger, "polling data"); auto block = client->pollData(); client->throwServerExceptionIfAny(); if (!block) - { - LOG_INFO(logger, "no more data"); return {}; - } - LOG_INFO(logger, "received {} rows", block->rows()); return {block->getColumns(), block->rows()}; } diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h b/src/ClickHouse/Source.h similarity index 50% rename from src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h rename to src/ClickHouse/Source.h index 13169a68978..8a4d320c4f9 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouseSource.h +++ b/src/ClickHouse/Source.h @@ -1,25 +1,23 @@ #pragma once -#include +#include #include namespace DB { -namespace ExternalTable +namespace ClickHouse { -class ClickHouseSource final : public ISource +class Source final : public ISource { public: - ClickHouseSource( + Source( const String & database, const String & table, const Block & header, - std::unique_ptr client_, - QueryProcessingStage::Enum processed_stage, - ContextPtr context_, - Poco::Logger * logger_); + std::unique_ptr client_, + ContextPtr context_); String getName() const override { return "ClickHouseSource"; } @@ -29,11 +27,10 @@ class ClickHouseSource final : public ISource private: bool started {false}; - std::unique_ptr client; + std::unique_ptr client; String query; ContextPtr context; - Poco::Logger * logger; }; } diff --git a/src/Client/CMakeLists.txt b/src/Client/CMakeLists.txt index 119414a8a70..83bbe418246 100644 --- a/src/Client/CMakeLists.txt +++ b/src/Client/CMakeLists.txt @@ -1,3 +1,3 @@ if (ENABLE_EXAMPLES) add_subdirectory(examples) -endif() \ No newline at end of file +endif() diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 25a6a04c0a6..1c2a676d589 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -159,7 +159,7 @@ SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { String function_name; AggregateFunctionPtr function; diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 9ed7c1fd6ed..9afdb169e74 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -59,7 +59,7 @@ size_t DataTypeArray::getNumberOfDimensions() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 1) throw Exception("array data type family must have exactly one argument - type of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 26bd21c1b8b..90cb39ca218 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -68,7 +68,7 @@ String DataTypeCustomSimpleAggregateFunction::getName() const } -static std::pair create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static std::pair create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { String function_name; AggregateFunctionPtr function; diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 69a94b68e0b..dc33957c854 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -239,7 +239,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) } template -static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr createExact(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) throw Exception("Data type enum cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); @@ -279,7 +279,7 @@ static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, [[m return std::make_shared(values); } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) throw Exception("Data type enum cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 645c9633217..56c82972dc9 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -35,9 +35,9 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli DataTypePtr get(TypeIndex type) const; /// proton: ends. - DataTypePtr get(const String & full_name/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; - DataTypePtr get(const String & family_name, const ASTPtr & parameters/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; - DataTypePtr get(const ASTPtr & ast/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) const; + DataTypePtr get(const String & full_name, bool compatible_with_clickhouse = false) const; /// proton: updated + DataTypePtr get(const String & family_name, const ASTPtr & parameters, bool compatible_with_clickhouse = false) const; /// proton: updated + DataTypePtr get(const ASTPtr & ast, bool compatible_with_clickhouse = false) const; /// proton: updated DataTypePtr getCustom(DataTypeCustomDescPtr customization) const; /// Register a type family by its name. diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index d6d95a48e7f..635621c9df0 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -44,7 +44,7 @@ SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 1) throw Exception("The fixed_string data type family must have exactly one argument - size in bytes", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index 34cc83e5c49..95323b30331 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -150,7 +150,7 @@ SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 1) throw Exception("The low_cardinality data type family must have single argument - type of elements", diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index f549471fa09..6face18f595 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -127,7 +127,7 @@ bool DataTypeMap::checkKeyType(DataTypePtr key_type) return true; } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 2) throw Exception("The map data type family must have two arguments: key and value types", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index 86b4e1588fd..0d69396ac26 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -32,7 +32,7 @@ String DataTypeNestedCustomName::getName() const return s.str(); } -static std::pair create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static std::pair create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) throw Exception("The nested cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 439b10bc9cc..574900d76ca 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -61,7 +61,7 @@ SerializationPtr DataTypeNullable::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 1) throw Exception("Nullable data type family must have exactly one argument - nested type", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index aebb89aaa67..29283806167 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -39,7 +39,7 @@ SerializationPtr DataTypeString::doGetDefaultSerialization() const return std::make_shared(); } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (arguments && !arguments->children.empty()) { diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 25ad0d8919f..c063b73f418 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -339,7 +339,7 @@ SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) throw Exception("The tuple cannot be empty", ErrorCodes::EMPTY_DATA_PASSED); diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index d9a05a9ce20..a65a51f5403 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -65,7 +65,7 @@ SerializationPtr DataTypeDecimal::doGetDefaultSerialization() const } -static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 2) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", @@ -85,7 +85,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_ } template -static DataTypePtr createExact(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr createExact(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.size() != 1) throw Exception("The decimal data type family must have exactly two arguments: precision and scale", diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 7a77f84a58e..d1b4dbb6a4d 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -77,7 +77,7 @@ static DataTypePtr create(const ASTPtr & arguments/* proton: starts */, [[maybe_ return std::make_shared(timezone.value_or(String{})); } -static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create32(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) return std::make_shared(); @@ -90,7 +90,7 @@ static DataTypePtr create32(const ASTPtr & arguments/* proton: starts */, [[mayb return std::make_shared(timezone); } -static DataTypePtr create64(const ASTPtr & arguments/* proton: starts */, [[maybe_unused]] bool compatible_with_clickhouse = false/* proton: ends */) +static DataTypePtr create64(const ASTPtr & arguments, [[maybe_unused]] bool compatible_with_clickhouse = false) /// proton: updated { if (!arguments || arguments->children.empty()) return std::make_shared(DataTypeDateTime64::default_scale); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 232d885aa16..709b643ba24 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -23,7 +23,6 @@ #include /// proton: starts. -#include #include /// proton: ends. @@ -697,18 +696,6 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata( auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, /* hilite = */ false, "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); - /// proton: starts - if (!ast) - { - ParserCreateExternalTableQuery ex_table_parser; - std::string err_msg; - pos = query.data(); - ast = tryParseQuery(ex_table_parser, pos, pos + query.size(), err_msg, /* hilite = */ false, - "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); - LOG_ERROR(logger, "Failed to parse {} with CreateExternalTable parser: {}", metadata_file_path, err_msg); - } - /// proton: ends - if (!ast && throw_on_error) throw Exception(error_message, ErrorCodes::SYNTAX_ERROR); else if (!ast) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 72d9622ec14..3aefd1ccfb4 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -144,7 +144,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue { /// proton: starts if (table->isExternalTable()) - throw Exception("Cannot DETACH external table", ErrorCodes::SYNTAX_ERROR); + throw Exception("Cannot DETACH external table", ErrorCodes::NOT_IMPLEMENTED); /// proton: ends context_->checkAccess(drop_storage, table_id); diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 7f4f75933af..0f94a62dbc3 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -19,6 +19,7 @@ #include /// proton: starts +#include #include /// proton: ends. @@ -1147,8 +1148,10 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected, /// proton: starts. Add to parse MaterializedViewQuery ParserCreateMaterializedViewQuery streaming_view_p; + ParserCreateExternalTableQuery external_table_p; return table_p.parse(pos, node, expected) + || external_table_p.parse(pos, node, expected) || database_p.parse(pos, node, expected) || view_p.parse(pos, node, expected) || dictionary_p.parse(pos, node, expected) diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 485b2874103..2a0617371df 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -27,7 +27,6 @@ #include /// proton : starts -#include #include #include #include @@ -67,7 +66,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec /// proton: starts ParserShowFormatSchemasQuery show_format_schemas_p; ParserShowCreateFormatSchemaQuery show_create_format_schema_p; - ParserCreateExternalTableQuery create_external_table_p; /// proton: ends ASTPtr query; @@ -87,9 +85,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || describe_cache_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) || show_processlist_p.parse(pos, query, expected) - /// proton: starts - || create_external_table_p.parse(pos, query, expected) - /// proton: ends || create_p.parse(pos, query, expected) || alter_p.parse(pos, query, expected) || rename_p.parse(pos, query, expected) diff --git a/src/Storages/ExternalTable/CMakeLists.txt b/src/Storages/ExternalTable/CMakeLists.txt index 4558bbb4e93..bd39e1ba864 100644 --- a/src/Storages/ExternalTable/CMakeLists.txt +++ b/src/Storages/ExternalTable/CMakeLists.txt @@ -1,7 +1,6 @@ include("${proton_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(external_table .) -add_headers_and_sources(external_table ClickHouse) add_library(external_table ${external_table_headers} ${external_table_sources}) diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp b/src/Storages/ExternalTable/ClickHouse.cpp similarity index 63% rename from src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp rename to src/Storages/ExternalTable/ClickHouse.cpp index dab21acc242..9dc8b43ecdf 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.cpp +++ b/src/Storages/ExternalTable/ClickHouse.cpp @@ -1,10 +1,10 @@ -#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include -#include "Storages/ExternalTable/ExternalTableFactory.h" namespace DB { @@ -13,7 +13,13 @@ namespace ExternalTable { ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings) - : database(settings->database.value) + : timeouts( /// TODO do not hard-code it, allow customization via settings + /*connection_timeout_=*/ 1 * 60 * 1'000'000, + /*send_timeout_=*/ 1 * 60 * 1'000'000, + /*receive_timeout_=*/ 1 * 60 * 1'000'000, + /*tcp_keep_alive_timeout_=*/ 5 * 60 * 1'000'000 + ) + , database(settings->database.value) , table(settings->table.changed ? settings->table.value : name) , logger(&Poco::Logger::get("ExternalTable-ClickHouse-" + table)) { @@ -28,19 +34,20 @@ ClickHouse::ClickHouse(const String & name, ExternalTableSettingsPtr settings) if (!port) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid port in ClickHouse address"); - connection_params.host = host; - connection_params.port = port; - connection_params.user = settings->user.value; - connection_params.password = settings->password.value; - connection_params.default_database = settings->database.value; - connection_params.security = settings->secure.value ? Protocol::Secure::Enable : Protocol::Secure::Disable; - /// TODO read from settings - connection_params.timeouts = { - /*connection_timeout_=*/ 1 * 60 * 1'000'000, - /*send_timeout_=*/ 1 * 60 * 1'000'000, - /*receive_timeout_=*/ 1 * 60 * 1'000'000, - /*tcp_keep_alive_timeout_=*/ 5 * 60 * 1'000'000 - }; + pool = DB::ConnectionPoolFactory::instance().get( + /*max_connections=*/ 100, + /*host=*/ host, + /*port=*/ port, + /*default_database=*/ settings->database.value, + /*user=*/ settings->user.value, + /*password=*/ settings->password.value, + /*quota_key=*/ "", + /*cluster=*/ "", + /*cluster_secret=*/ "", + "TimeplusProton", + settings->compression.value ? Protocol::Compression::Enable : Protocol::Compression::Disable, + settings->secure.value ? Protocol::Secure::Enable : Protocol::Secure::Disable, + /*priority=*/ 0); } void ClickHouse::startup() @@ -53,27 +60,30 @@ Pipe ClickHouse::read( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & /*query_info*/, ContextPtr context, - QueryProcessingStage::Enum processed_stage, + QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, size_t /*num_streams*/) { auto header = storage_snapshot->getSampleBlockForColumns(column_names); - auto client = std::make_unique(connection_params, logger); - auto source = std::make_shared(database, table, std::move(header), std::move(client), processed_stage, context, logger); + auto client = std::make_unique(pool->get(timeouts), timeouts, logger); + auto source = std::make_shared(database, table, std::move(header), std::move(client), context); return Pipe(std::move(source)); } SinkToStoragePtr ClickHouse::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { - return std::make_shared(database, table, metadata_snapshot->getSampleBlock(), connection_params, context, logger); + auto client = std::make_unique(pool->get(timeouts), timeouts, logger); + return std::make_shared(database, table, metadata_snapshot->getSampleBlock(), std::move(client), context, logger); } ColumnsDescription ClickHouse::getTableStructure() { ColumnsDescription ret {}; - ClickHouseClient client {connection_params, logger}; - auto query = "DESCRIBE TABLE " + (database.empty() ? "" : backQuoteIfNeed(database) + ".") + backQuoteIfNeed(table); + DB::ClickHouse::Client client = {pool->get(timeouts), timeouts, logger}; + auto query = fmt::format("DESCRIBE TABLE {}{}", + database.empty() ? "" : backQuoteIfNeed(database) + ".", + backQuoteIfNeed(table)); /// This has to fail quickly otherwise it will block Proton from starting. client.executeQuery(query, /*query_id=*/"", /*fail_quick=*/true); LOG_INFO(logger, "Receiving table schema"); @@ -98,7 +108,7 @@ ColumnsDescription ClickHouse::getTableStructure() } { const auto & col = block->getByName("type"); - col_desc.type = factory.get(col.column->getDataAt(i).toString(), true); + col_desc.type = factory.get(col.column->getDataAt(i).toString(), /*compatible_with_clickhouse=*/true); } { const auto & col = block->getByName("comment"); diff --git a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h b/src/Storages/ExternalTable/ClickHouse.h similarity index 91% rename from src/Storages/ExternalTable/ClickHouse/ClickHouse.h rename to src/Storages/ExternalTable/ClickHouse.h index adfa798ac34..a87eb418a0a 100644 --- a/src/Storages/ExternalTable/ClickHouse/ClickHouse.h +++ b/src/Storages/ExternalTable/ClickHouse.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -32,7 +33,8 @@ class ClickHouse final : public IExternalTable SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; private: - ConnectionParameters connection_params; + ConnectionPoolPtr pool; + ConnectionTimeouts timeouts; String database; String table; diff --git a/src/Storages/ExternalTable/ExternalTableSettings.h b/src/Storages/ExternalTable/ExternalTableSettings.h index cc00f1b281b..e99cd972706 100644 --- a/src/Storages/ExternalTable/ExternalTableSettings.h +++ b/src/Storages/ExternalTable/ExternalTableSettings.h @@ -13,7 +13,8 @@ class ASTStorage; M(String, address, "", "The address of the ClickHouse server to connect", 0) \ M(String, user, "default", "The user to be used to connect to the ClickHouse server", 0) \ M(String, password, "", "The password to be used to connect to the ClickHouse server", 0) \ - M(Bool, secure, false, "Indicates if it uses TLS connection", 0) \ + M(Bool, secure, false, "Indicates if it uses secure connection", 0) \ + M(Bool, compression, true, "Indicates if compression should be enabled", 0) \ M(String, database, "default", "The datababse to connect to", 0) \ M(String, table, "", "The ClickHouse table to which the external table is mapped", 0) diff --git a/src/Storages/ExternalTable/StorageExternalTable.cpp b/src/Storages/ExternalTable/StorageExternalTable.cpp index ecd5665d8bc..5776bc96be0 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.cpp +++ b/src/Storages/ExternalTable/StorageExternalTable.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include @@ -9,40 +9,49 @@ namespace DB { StorageExternalTable::StorageExternalTable( - std::unique_ptr settings, - const StorageFactory::Arguments & args) -: IStorage(args.table_id) -, WithContext(args.getContext()->getGlobalContext()) + const StorageID & table_id, + std::unique_ptr settings, + bool is_attach, + ContextPtr context_) +: IStorage(table_id) +, WithContext(context_) { - external_table = ExternalTableFactory::instance().getExternalTable(args.table_id.getTableName(), std::move(settings)); + external_table = ExternalTableFactory::instance().getExternalTable(table_id.getTableName(), std::move(settings)); - /// First, setStorageMetadata should be allowed to fail (the only failable part is getTableStructure function call), otherwise it will block Proton from starting up. - /// Second, when it fails, the exception should be caught, otherwise, Proton will fail to start. + /// Two situations: + /// * Create a new table. In this case, we want it fails the create query if it fails to fetch the columns description. So that users know that there is something with the connection and they can try again once the issue is resolved. + /// * Attach a table (Proton restarts). In this case, even it fails to fetch the columns description, we want to make sure that: + /// - it does not terminate Proton, otherwise Proton will never start again + /// - it does not block Proton from starting, otherwise Proton will get stuck + /// So, we let it keep retrying in the background, and hoping it will eventually succeeded (until the user drops the table). + /// /// TODO we could use cache to save the table structure, so that when Proton restarts it could read from the cache directly. try { - setStorageMetadata(args); + fetchColumnsDescription(); } catch (const Exception & e) { - LOG_ERROR(&Poco::Logger::get("ExternalTable-ClickHouse" + args.table_id.getFullTableName()), - "Failed to fetch table structure, error: {}. Will keep retrying in background", e.what()); + if (!is_attach) + e.rethrow(); + + LOG_ERROR(&Poco::Logger::get("ExternalTable"), + "Failed to fetch table structure for {}, error: {}. Will keep retrying in background", + getStorageID().getFullTableName(), + e.what()); background_jobs.scheduleOrThrowOnError([this](){ while (!is_dropped) { try { std::this_thread::sleep_for(std::chrono::seconds(5)); - auto metadata = getInMemoryMetadata(); - metadata.setColumns(external_table->getTableStructure()); - setInMemoryMetadata(metadata); - break; + fetchColumnsDescription(); + return; } catch (const Exception &) { } } }); } - } Pipe StorageExternalTable::read( @@ -65,13 +74,12 @@ SinkToStoragePtr StorageExternalTable::write( return external_table->write(query, metadata_snapshot, context_); } -void StorageExternalTable::setStorageMetadata(const StorageFactory::Arguments & args) +void StorageExternalTable::fetchColumnsDescription() { - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(external_table->getTableStructure()); - storage_metadata.setConstraints(args.constraints); - storage_metadata.setComment(args.comment); - setInMemoryMetadata(storage_metadata); + auto desc = external_table->getTableStructure(); + auto metadata = getInMemoryMetadata(); + metadata.setColumns(std::move(desc)); + setInMemoryMetadata(metadata); } void registerStorageExternalTable(StorageFactory & factory) @@ -84,7 +92,11 @@ void registerStorageExternalTable(StorageFactory & factory) auto settings = std::make_unique(); settings->loadFromQuery(*args.storage_def); - return StorageExternalTable::create(std::move(settings), args); + return StorageExternalTable::create( + args.table_id, + std::move(settings), + args.attach, + args.getContext()->getGlobalContext()); }; factory.registerStorage( diff --git a/src/Storages/ExternalTable/StorageExternalTable.h b/src/Storages/ExternalTable/StorageExternalTable.h index f5c8d1e86a3..c5d86689484 100644 --- a/src/Storages/ExternalTable/StorageExternalTable.h +++ b/src/Storages/ExternalTable/StorageExternalTable.h @@ -40,10 +40,14 @@ class StorageExternalTable final : public shared_ptr_helper settings, const StorageFactory::Arguments & args); + StorageExternalTable( + const StorageID & table_id, + std::unique_ptr settings, + bool is_attach, + ContextPtr context_); private: - void setStorageMetadata(const StorageFactory::Arguments & args); + void fetchColumnsDescription(); IExternalTablePtr external_table; ThreadPool background_jobs {1};