From f03b023f1a2cf48569499c7ad4222d15094efbb8 Mon Sep 17 00:00:00 2001 From: Dominik Lohmann Date: Sat, 7 Oct 2023 20:15:45 +0200 Subject: [PATCH] Remove support for models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models were fundamentally broken in query evaluation, and no one noticed—which is usually a sign that a feature can be removed without much user friction. We already have plans to bring the feature back through more powerful expressions in TQL. --- .../changes/3552--disable-dense-indexes.md | 5 + libtenzir/builtins/operators/where.cpp | 2 +- libtenzir/include/tenzir/taxonomies.hpp | 59 +------ libtenzir/src/catalog.cpp | 7 - libtenzir/src/module.cpp | 13 +- libtenzir/src/taxonomies.cpp | 150 +----------------- schema/taxonomy/tenzir/network.yaml | 9 -- .../reference/taxonomy-queries/step_03.ref | 1 - .../reference/taxonomy-queries/step_04.ref | 1 - .../reference/taxonomy-queries/step_05.ref | 1 - .../reference/taxonomy-queries/step_06.ref | 1 - .../reference/taxonomy-queries/step_07.ref | 1 - .../reference/taxonomy-queries/step_08.ref | 1 - tenzir/integration/tests.yaml | 8 - .../model-composition.excalidraw.svg | 16 -- web/docs/data-model/taxonomies.md | 81 ++-------- 16 files changed, 22 insertions(+), 334 deletions(-) delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_03.ref delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_04.ref delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_05.ref delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_06.ref delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_07.ref delete mode 100644 tenzir/integration/reference/taxonomy-queries/step_08.ref delete mode 100644 web/docs/data-model/model-composition.excalidraw.svg diff --git a/changelog/next/changes/3552--disable-dense-indexes.md b/changelog/next/changes/3552--disable-dense-indexes.md index 9db35b22fd6..709d5aff092 100644 --- a/changelog/next/changes/3552--disable-dense-indexes.md +++ b/changelog/next/changes/3552--disable-dense-indexes.md @@ -2,3 +2,8 @@ Tenzir no longer builds dense indexes for imported events. Dense indexes improved query performance at the cost of a higher memory usage. However, over time the performance improvement became smaller due to other improvements in the underlying storage engine. + +Tenzir no longer supports models in taxonomies. Since Tenzir v4.0 they were only +supported in the deprecated `tenzir-ctl export` and `tenzir-ctl count` commands. +We plan to bring the functionality back in the future with more powerful +expressions in TQL. diff --git a/libtenzir/builtins/operators/where.cpp b/libtenzir/builtins/operators/where.cpp index 674a5936c65..80003b53678 100644 --- a/libtenzir/builtins/operators/where.cpp +++ b/libtenzir/builtins/operators/where.cpp @@ -67,7 +67,7 @@ class where_operator final auto initialize(const type& schema, operator_control_plane& ctrl) const -> caf::expected override { - auto ts = taxonomies{.concepts = ctrl.concepts(), .models = {}}; + auto ts = taxonomies{.concepts = ctrl.concepts()}; auto resolved_expr = resolve(ts, expr_.inner, schema); if (not resolved_expr) { diagnostic::warning("{}", resolved_expr.error()) diff --git a/libtenzir/include/tenzir/taxonomies.hpp b/libtenzir/include/tenzir/taxonomies.hpp index 9291de2858f..acd4f7195a6 100644 --- a/libtenzir/include/tenzir/taxonomies.hpp +++ b/libtenzir/include/tenzir/taxonomies.hpp @@ -62,54 +62,17 @@ using concepts_map = detail::stable_map; /// to a `concepts_map`. extern const type concepts_data_schema; -/// The definition of a model. -struct model { - /// The description of the model. - std::string description; - - /// The ordered concepts and models that the model is composed of. - /// If an entry is another model, its concepts must also be represented for - /// a schema to be considered. - std::vector definition; - - friend bool operator==(const model& lhs, const model& rhs); - - template - friend auto inspect(Inspector& f, model& m) { - return f.object(m).pretty_name("model").fields( - f.field("description", m.description), - f.field("definition", m.definition)); - } - - inline static const record_type& schema() noexcept { - static const auto result = record_type{ - {"description", string_type{}}, - {"definition", list_type{string_type{}}}, - }; - return result; - } -}; - -/// Maps model names to their definitions. -using models_map = detail::stable_map; - -/// Describes the schema of a tenzir::list of models for automatic conversion to -/// a `models_map`. -extern const type models_data_schema; - /// A taxonomy is a combination of concepts and models. Tenzir stores all /// configured taxonomies in memory together, hence the plural naming. struct taxonomies { concepts_map concepts; - models_map models; - friend bool operator==(const taxonomies& lhs, const taxonomies& rhs); template friend auto inspect(Inspector& f, taxonomies& t) { return f.object(t) .pretty_name("taxonomies") - .fields(f.field("concepts", t.concepts), f.field("models", t.models)); + .fields(f.field("concepts", t.concepts)); } }; @@ -123,8 +86,8 @@ std::vector resolve_concepts(const concepts_map& concepts, std::vector fields_or_concepts); -/// Substitutes concept and model identifiers in field extractors with -/// replacement expressions containing only concrete field names. +/// Substitutes concept identifiers in field extractors with replacement +/// expressions containing only concrete field names. /// @param t The set of taxonomies to apply. /// @param e The original expression. /// @param schema An optional schema to restrict taxonomy resolution by. @@ -136,22 +99,6 @@ resolve(const taxonomies& t, const expression& e, const type& schema = {}); namespace fmt { -template <> -struct formatter { - template - constexpr auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { - return ctx.begin(); - } - - template - auto format(const tenzir::model& value, FormatContext& ctx) - -> decltype(ctx.out()) { - return fmt::format_to(ctx.out(), - "model {{description: {}, definition: [{}]}}", - value.description, fmt::join(value.definition, ", ")); - } -}; - template <> struct formatter { template diff --git a/libtenzir/src/catalog.cpp b/libtenzir/src/catalog.cpp index 7d39cf57275..29dff30eed5 100644 --- a/libtenzir/src/catalog.cpp +++ b/libtenzir/src/catalog.cpp @@ -625,14 +625,7 @@ catalog(catalog_actor::stateful_pointer self, self->quit(std::move(err)); return catalog_actor::behavior_type::make_empty_behavior(); } - auto taxonomies = load_taxonomies(self->system().config()); - if (!taxonomies) { - self->quit(std::move(taxonomies.error())); - return catalog_actor::behavior_type::make_empty_behavior(); - } self->state.taxonomies.concepts = modules::concepts(); - // TODO: Taxonomy models are to be removed soon. - self->state.taxonomies.models = std::move(taxonomies->models); // Load loaded schema types from the singleton. // TODO: Move to the load handler and re-parse the files. TENZIR_DIAGNOSTIC_PUSH diff --git a/libtenzir/src/module.cpp b/libtenzir/src/module.cpp index 72b7ff8d2cb..ecbd5000932 100644 --- a/libtenzir/src/module.cpp +++ b/libtenzir/src/module.cpp @@ -238,7 +238,6 @@ auto load_taxonomies(const caf::actor_system_config& cfg) std::error_code err{}; auto dirs = get_module_dirs(cfg); concepts_map concepts; - models_map models; for (const auto& dir : dirs) { TENZIR_DEBUG("loading taxonomies from {}", dir); const auto dir_exists = std::filesystem::exists(dir, err); @@ -258,19 +257,9 @@ auto load_taxonomies(const caf::actor_system_config& cfg) for (auto& [name, definition] : concepts) TENZIR_DEBUG("extracted concept {} with {} fields", name, definition.fields.size()); - if (auto err = convert(yaml, models, models_data_schema)) - return caf::make_error(ec::parse_error, - "failed to extract models from file", - file.string(), err.context()); - for (auto& [name, definition] : models) { - TENZIR_DEBUG("extracted model {} with {} fields", name, - definition.definition.size()); - TENZIR_TRACE("uses model mapping {} -> {}", name, - definition.definition); - } } } - return tenzir::taxonomies{std::move(concepts), std::move(models)}; + return tenzir::taxonomies{std::move(concepts)}; } } // namespace tenzir diff --git a/libtenzir/src/taxonomies.cpp b/libtenzir/src/taxonomies.cpp index 92fc89d7f5b..7673db9112e 100644 --- a/libtenzir/src/taxonomies.cpp +++ b/libtenzir/src/taxonomies.cpp @@ -57,19 +57,8 @@ const type concepts_data_schema = type{map_type{ }, }}; -bool operator==(const model& lhs, const model& rhs) { - return lhs.definition == rhs.definition; -} - -const type models_data_schema = type{map_type{ - type{string_type{}, {{"key", "model.name"}}}, - record_type{ - {"model", model::schema()}, - }, -}}; - bool operator==(const taxonomies& lhs, const taxonomies& rhs) { - return lhs.concepts == rhs.concepts && lhs.models == rhs.models; + return lhs.concepts == rhs.concepts; } std::vector @@ -170,142 +159,9 @@ resolve(const taxonomies& ts, const expression& e, const type& schema) { return expression{d}; } }; - auto resolve_models - = [&](const std::string& field_name, relational_operator op, - const tenzir::data& data, - auto make_predicate) -> caf::expected { - auto r = caf::get_if(&data); - if (!r) - // Models can only be compared to records, so if the data side is - // not a record, we move to the concept substitution phase directly. - return resolve_concepts(field_name, op, data, make_predicate); - if (r->empty()) - return expression{caf::none}; - auto it = ts.models.find(field_name); - if (it == ts.models.end()) - return resolve_concepts(field_name, op, data, make_predicate); - // We have a model predicate. - // ========================== - // The model definition forms a tree that contains models as non-leaf - // nodes and concepts as leafs. For model substition we need to iterate - // over the leafs in the order of definition, which is left to right. - // The levels stack is used to keep track of the current position at - // each level of the tree. - auto level_1 = std::pair{it->second.definition.begin(), - it->second.definition.end()}; - auto levels = std::stack{std::vector{std::move(level_1)}}; - auto descend = [&] { - for (auto child_component = ts.models.find(*levels.top().first); - child_component != ts.models.end(); - child_component = ts.models.find(*levels.top().first)) { - auto& child_def = child_component->second.definition; - levels.emplace(child_def.begin(), child_def.end()); - } - }; - // Move the cursor to the leftmost leaf in the tree. - descend(); - auto next_leaf = [&] { - // Update the levels stack; explicit scope for clarity. - while (!levels.empty() && ++levels.top().first == levels.top().second) - levels.pop(); - if (!levels.empty()) { - descend(); - // Empty models ought to be rejected at load time. - TENZIR_ASSERT(levels.top().first != levels.top().second); - } - }; - // The conjunction for all model concepts that are restriced by a value - // in rec. - conjunction restricted; - // The conjunction for all model concepts that aren't specified in rec. - conjunction unrestricted; - auto abs_op = is_negated(op) ? negate(op) : op; - auto insert_meta_field_predicate = [&] { - auto make_meta_field_predicate = - [&]([[maybe_unused]] relational_operator op, const tenzir::data&) { - return [](std::string item) { - return predicate{field_extractor{std::move(item)}, - relational_operator::not_equal, - tenzir::data{}}; - }; - }; - unrestricted.emplace_back( - resolve_concepts(*levels.top().first, relational_operator::equal, - caf::none, make_meta_field_predicate)); - }; - auto named = !r->begin()->first.empty(); - if (named) { - // TODO: Nested records of the form - // , process_filename: "svchost.exe"> - // are currently not supported. - for (; !levels.empty(); next_leaf()) { - // TODO: Use `ends_with` for better ergonomics. - // TODO: Remove matched entries and check mismatched concepts. - auto concept_field = r->find(*levels.top().first); - if (concept_field == r->end()) - insert_meta_field_predicate(); - else - restricted.emplace_back( - resolve_concepts(*levels.top().first, abs_op, - concept_field->second, make_predicate)); - } - } else { - auto value_iterator = r->begin(); - for (; !levels.empty(); next_leaf(), ++value_iterator) { - if (value_iterator == r->end()) - // The provided record is shorter than the matched concept. - // TODO: This error could be rendered in a way that makes it - // clear how the mismatch happened. For example: - // src_ip, src_port, dst_ip, dst_port, proto - // < _, _, 1.2.3.4, _> - // ^~~~~ - // not enough fields provided - return caf::make_error(ec::invalid_query, *r, - "doesn't match the model:", it->first); - if (caf::holds_alternative(value_iterator->second)) - insert_meta_field_predicate(); - else - restricted.emplace_back( - resolve_concepts(*levels.top().first, abs_op, - value_iterator->second, make_predicate)); - } - if (value_iterator != r->end()) { - // The provided record is longer than the matched concept. - // TODO: This error could be rendered in a way that makes it - // clear how the mismatch happened. For example: - // src_ip, src_port, dst_ip, dst_port, proto - // < _, _, 1.2.3.4, _, _, "tcp"> - // ^~~~~ - // too many fields - // provided - return caf::make_error(ec::invalid_query, *r, - "doesn't match the model:", it->first); - } - } - expression expr; - switch (restricted.size()) { - case 0: { - return unrestricted; - } - case 1: { - expr = restricted[0]; - break; - } - default: { - expr = expression{std::move(restricted)}; - break; - } - } - if (is_negated(op)) - expr = negation{std::move(expr)}; - if (unrestricted.empty()) - return expr; - unrestricted.push_back(expr); - return unrestricted; - }; if (auto data = caf::get_if(&pred.rhs)) { if (auto fe = caf::get_if(&pred.lhs)) { - return resolve_models( + return resolve_concepts( fe->field, pred.op, *data, [&](relational_operator op, const tenzir::data& o) { return [&, op](const std::string& item) { @@ -316,7 +172,7 @@ resolve(const taxonomies& ts, const expression& e, const type& schema) { } if (auto data = caf::get_if(&pred.lhs)) { if (auto fe = caf::get_if(&pred.rhs)) { - return resolve_models( + return resolve_concepts( fe->field, pred.op, *data, [&](relational_operator op, const tenzir::data& o) { return [&, op](const std::string& item) { diff --git a/schema/taxonomy/tenzir/network.yaml b/schema/taxonomy/tenzir/network.yaml index 164994bf80c..e0e6f365c57 100644 --- a/schema/taxonomy/tenzir/network.yaml +++ b/schema/taxonomy/tenzir/network.yaml @@ -96,12 +96,3 @@ concepts: - net.outer_vlan - net.inner_vlan - -- model: - name: net.connection - definition: - - net.src.ip - - net.src.port - - net.dst.ip - - net.dst.port - - net.proto diff --git a/tenzir/integration/reference/taxonomy-queries/step_03.ref b/tenzir/integration/reference/taxonomy-queries/step_03.ref deleted file mode 100644 index 4c5c8078521..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_03.ref +++ /dev/null @@ -1 +0,0 @@ -158 diff --git a/tenzir/integration/reference/taxonomy-queries/step_04.ref b/tenzir/integration/reference/taxonomy-queries/step_04.ref deleted file mode 100644 index b8639573348..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_04.ref +++ /dev/null @@ -1 +0,0 @@ -476 diff --git a/tenzir/integration/reference/taxonomy-queries/step_05.ref b/tenzir/integration/reference/taxonomy-queries/step_05.ref deleted file mode 100644 index 9559cd9241e..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_05.ref +++ /dev/null @@ -1 +0,0 @@ -730 diff --git a/tenzir/integration/reference/taxonomy-queries/step_06.ref b/tenzir/integration/reference/taxonomy-queries/step_06.ref deleted file mode 100644 index e944bea9ac3..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_06.ref +++ /dev/null @@ -1 +0,0 @@ -1206 diff --git a/tenzir/integration/reference/taxonomy-queries/step_07.ref b/tenzir/integration/reference/taxonomy-queries/step_07.ref deleted file mode 100644 index b8639573348..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_07.ref +++ /dev/null @@ -1 +0,0 @@ -476 diff --git a/tenzir/integration/reference/taxonomy-queries/step_08.ref b/tenzir/integration/reference/taxonomy-queries/step_08.ref deleted file mode 100644 index 9559cd9241e..00000000000 --- a/tenzir/integration/reference/taxonomy-queries/step_08.ref +++ /dev/null @@ -1 +0,0 @@ -730 diff --git a/tenzir/integration/tests.yaml b/tenzir/integration/tests.yaml index 7bf27e15a47..42de70d132a 100644 --- a/tenzir/integration/tests.yaml +++ b/tenzir/integration/tests.yaml @@ -329,14 +329,6 @@ tests: - command: import -b suricata input: data/pcap/suricata/eve.json.gz - command: count "net.src.ip == 192.168.168.100" - - command: count "net.connection == <192.168.168.100, _, 72.247.178.18, _, _>" - # We omit the whitespace after the colon on purpose, otherwise pyyaml - # thinks this is a key-value pair. - - command: count 'net.connection == ' - - command: count 'net.connection != ' - - command: count "net.connection == <_, _, _, _, _>" - - command: count "net.connection == <_, _, _, 80, _>" - - command: count "net.connection != <_, _, _, 80, _>" Arrow Full Data Model: tags: [export, arrow] diff --git a/web/docs/data-model/model-composition.excalidraw.svg b/web/docs/data-model/model-composition.excalidraw.svg deleted file mode 100644 index 721e412837c..00000000000 --- a/web/docs/data-model/model-composition.excalidraw.svg +++ /dev/null @@ -1,16 +0,0 @@ - - - eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nO1cXNl2m8hcdTAwMTZ991do+b5GdM1D3iw7XHUwMDFl0k7sWI6T+N5eWlhgXHRcdTAwMGKBXGbIU6/8e1x1MDAxZvBcdTAwMDBCXHUwMDAyIWNb6uTqIZaKoiiqzt77XGaQv9dcdTAwMWGN9eh2ZK+/b6zbN13TdazAvF5/XHUwMDE3t1/ZQej4XHUwMDFlXHUwMDFjXCLJ79BcdTAwMWZcdTAwMDfdpGc/ikbh+z/+MEcjIz3L6PrD+zNt11x1MDAxZdpeXHUwMDE0Qt//wu9G4+/k38y1XHUwMDAyu1x1MDAxYplez7WTXHUwMDEzkkPp5bBQ+dbPvpdcXFpQzKlUKu3ghFtwuci24Oi56YZ2eiRuWkf4YjTCXHUwMDAxXHUwMDE2w10rkv3+TWd/ezu96rnjuu3o1k1mXHUwMDE1+nAn6bEwXG78gf3NsaJ+PKtcXHvRWYE/7vU9O1xmJ87xR2bXiW7jNoSeWu+X4H0jbbmJe5D0d3xcdTAwMDbVKHftTd/1g/ja/0HJJ736mdlcdTAwMWT0YFxunpX2wdw0z87TPtePdyTScfu20+tH8VxuZ65lJ6tKkeJcXDL61Fx1MDAxZY8/2rOS7f0re9+e9XDfj/ud7jh+aPmZzjTu/yFvKVlryezi5f5ccrtcdTAwMWLKy+Bmb799dtRcdTAwMTlcdTAwMWSNPPl0R1x1MDAxM6ZcdTAwMTXZN9H604Gf78qG9S9PPlx1MDAwNGdcdTAwMWZGW9FcdTAwMTe3ed25+jS2XHUwMDA232dcdTAwMGZrXHUwMDA2gX9dddyz5sFd+7M8NU83L3TkXHUwMDFkfOJDXHUwMDE1Vlx1MDAxYvfhW7qu45Fl3ps3XHUwMDE2Ulx1MDAxMKG41IKlu+Q63lx1MDAwMFx1MDAwZXpj103b/O4gRcRaZsJTSEyWayZcYnFcdTAwMTFcYlx1MDAxNYOJcC10ZVx1MDAxMJZv34qCkE+AkOF6IOzaXHUwMDE2s8xcdTAwMTkg5DNASKZAiFx0xVxmXHUwMDExonQtXHUwMDE4kvkwnG15XHUwMDFjLI9pLqiiXHUwMDBiWF66w75cdTAwMTe1nbuEXHTEROu2OXTchOWemlx1MDAxM5N8/yg4XHUwMDFk27NGvuNF2fVccm24eG60+LRcctfpxda73oXbsYNcdMOOXHUwMDFjUKqnXHUwMDBlQ8eysvLThbmYMGawV0U2/MDpOZ7pXHUwMDFlXHUwMDE3TLVcdTAwMTRypeLHXHUwMDBiYYc5RkxKtID4bVxcnjun/bPjXHUwMDFmftjf+dZcbrzb8cnhauOOipz4sXq4U0LZWszAnaokfpxcbqIoUbhcdTAwMTbsaqrfx8HwU3Ry7dNPvfDDgG/1Lo93m0tVvyoqxVx1MDAwNX91lcpoUF6kKOKYMEorg6V8lVdcdTAwMTQsk1wilVx1MDAwNc+LipSsJlJYXHUwMDEwKlx1MDAxOOa/n0g5ozeVpznEXiBPMMlSlN2jfJYziDEtXHUwMDAyXHUwMDFhODBEUnBNeGWklVx1MDAxM89KXCKNKGYwTTDWXHUwMDE0IXDGWD5CM6RcdTAwMTRSc0VcdTAwMTkjXFy8XG5cblx1MDAwNTdcdTAwMDSgi1wiobVcdTAwMDJcdTAwMGZcXE1jXHUwMDEyK4Myrlx1MDAwMVx1MDAwM1xuY3BZXHUwMDExn8IoQoxcdTAwMDCeOV9cdTAwMWVGgZhcdTAwMTXH+jlcdTAwMThcciMziFqOZzleb3JiXHUwMDBmSYcq7luC6u44niUyYLdcdTAwMTA4XHUwMDE5Wlx0XHRzUpk+PXNcdTAwMTQ7/pP7Pn3j4PTNn1A5YJ8uplxyxajkMyeKkURcdTAwMDTmXHUwMDAw21x1MDAwN05cdOXTM3HNMNr0h0MngtU+TFx1MDAxY9HcqibLt1x1MDAxMcO8b5tW/ijcSfbYelx1MDAxNDg5PzXxbif9lfRbI8VM8uPp+1/vZvYm0pCIwsrinD3HnybmhpZcdTAwMDQjRe8hReS88Vxu8XE/3lx1MDAxNDTS8dayf1x1MDAxN2dHolx1MDAwYtlcdTAwMTGiZMaJ1inrzCPH8tzBipIjNyA6XHUwMDE1gFwirDlcdTAwMTIp/SWRM6NcdTAwMDZcIkwrXHUwMDA17lx1MDAwMdf4lciRXHUwMDE5WMFUhVx1MDAxMkhw8Fx1MDAwM2aQI1x1MDAwNZJmYHFAfoBcIpFcdKRcdTAwMWXJUTOmwIBIvWRXXXJcdTAwMTRcIpN8WVwiOTaBXHUwMDFkKYKN1VQqJFx1MDAxNeacZbrdU1Zu65F+XHUwMDFlQXbaTfU5wL1gINBl95JHflx1MDAxMNBpgsTIXHUwMDEwUlx1MDAwMflcdTAwMTHgaoYkk7PnTYE3OMeUx9OHuf1cdTAwMDJcXKlcdTAwMTVcdTAwMDIqgbiGaUp19mxMXGbOJWJw02Dfiko2lyqL0JJcZjeFk1x1MDAwNZmyNMGRXHRR8iGbZEKDXHUwMDBmRSpzZbnRrCRX5vNcdTAwMWKMLDW/gSlcdTAwMDPSXHUwMDEzTCw1vd88kTvaZXdcdTAwMDN8XHUwMDExXHUwMDFldG6t1vHF6Pa3Te+/ReJEsVwiXHUwMDE0XHUwMDEyXHUwMDE0XHUwMDE3XHUwMDE4IJSvnt4v375cdTAwMTWFYS69XzPNWDNzwlx1MDAxMNJcXIJ3vzy/4z5xwlx1MDAxNjC8dINrJE5GfvC2mf05mlGQOkmm+XzRy7JxPn/CwElBiiyQ1o/8y5v+Uef6/IdcdTAwMTJB67vb1Kd8uNp4y9e0Oa+Ht5o1bVh0XHUwMDAyYVx1MDAxYlx1MDAxMfWyIHWr2t///Li5/+P7XvduvFx1MDAxNVxyXHUwMDFkdLtjfakve8NbdDRgh1x1MDAxZi9C2XVEd/PSubpxX0D2RNs/2VxmwmPX77Q/7lx1MDAwZfZQ78Pm8OVk702q2kpcdTAwMTbhUFx0RpTUpHqkXr59K1xuw0nZ469VMJhV1WZTMFx1MDAxNFwi1lx1MDAxY5RcdTAwMDZxv77sWXZcdTAwMTiBvkRgeZ3/eaW1bfpKXG44Rz7yXG5YMOPni2GGdKcwqCB6XHUwMDE1meTNXFwpVK3PTfdr53C0//X2+M+NLbPjtFdcdTAwMWKD+VxikKOlRoBcdTAwMTTMn3Aqllrh3nD7et86XHUwMDFjqMFW+Fx1MDAwM4dur4Nuvi5VXHSrKNZbXHUwMDA0arowUONUw1x1MDAxNlx1MDAwYlU9Titf5Vx1MDAxNVx1MDAwNUtOsGqmS2rGaZpIXCKZ+k316o2L3HO4vUynnl/pZohcdTAwMTRcdTAwMDFcdTAwMGUrjZRcdTAwMDbIVa90l1x1MDAxM9BKXCIuLuZwqrRm91x1MDAxNc9cdTAwMWP+ODGYQHCcq7gsJ19cdTAwMDWNglx1MDAxOVx1MDAwMkvCeXGlm1x1MDAxMFx1MDAwMzxHhFx1MDAxOedcdTAwMTRwoTKYfHQuIbxDhLOlXHUwMDE2umn8eMRzwFqxllPuyzWy9WPKkeJCIYlcdTAwMTFLn2drpHWc7K4zKaduvFJcdTAwMWSnXHUwMDFjtI3JQjfTXHUwMDBmn/Tp8okpXHUwMDEzXHUwMDFkXHUwMDA3hSh+WEBcIo7F1JT+bVVcdTAwMWNhxFx1MDAxMVx1MDAwZlazK95cdTAwMTKO8rjIr6pWvItwkow3XHKRdLy17N/FWZKjQlx1MDAxZlx1MDAxZVNKiSaUpoCcx5LliYWVZcnikjfwjlx1MDAwMUq9XHUwMDAyJW/ownRaykv1/fH5Vk1cdTAwMTCWYJJLJUmiVbpESyTJuODNNOVMx8/8MsQlI5luL1rw3vyzJ77sXHUwMDFlj083zMOBv2uF+1x1MDAxYl9mPFx1MDAxMfT/gvfsgrem5OVcbt55lCzIk6XZXHUwMDBlWexPMs4klniBgne50awkU06lO9RS01x1MDAxZFx1MDAxMsecjJab+Fx1MDAwZq6OOlx1MDAwN+gq6m9FXHUwMDAy9Ta+nVx1MDAxZpDt8W+b+H+LNFxuzry+lYchaDknXHUwMDFjyeqvXG6U79+KwnAykFx1MDAxMzWzjnVfXHUwMDE1kOBcdTAwMDKD0uqlvyqwnFTKm5e95yhHWTKlZu2bXHUwMDE0Q1x1MDAwZmBcdTAwMDfuZ4ak576js+Py1j4+3/1yd3DVvkHfWp9uP6428JqYTVxuIJP1kFe39FxywT1VKvuOzFx1MDAxMlx1MDAxNFB19aC7d4BbI5t91eN2U3rIqq+AK/jEV+m4x+HRXsvvOidhoPc9kNX97e9cdTAwMDWOwELjmjt3YWdz2+9d7Fx1MDAxY1x1MDAwNqco4q2m3X45xX6LUn1cdG0wXGJxIE6rnoYtt7ZcdTAwMTWljVxc5pXV/F8g6r6Ajlx1MDAxNKJAXHUwMDFhaqnpVERcdTAwMDVZxPLqXHQ2SKhcdTAwMDfKXHUwMDE22+NbSvVcdTAwMWORy0t1ZpalQCt5f4VcdTAwMTZDXHIrQqSS1Z+KKaeelcRcdTAwMWFRXHUwMDA2koxgJXOQXHUwMDEz0lx1MDAxMJJLlX1s5iWxJ7GhSW789HGZWVx1MDAxN79cdTAwMDcjQVJpXGY8vNT3VLAm4jVf4ivHQSNbKOCKU4KU4FpRKYnIdLpPo6U7PHXPXHUwMDEz2brJ6f+7UmZ02priT5MqQyBNJZ83wFxmc0xcdTAwMDbIWGI6xFr27zPqrLTw1X0qk1eOdPV4vNyNWk3OXHUwMDExXHUwMDA2UVOP41FiUM6ZyDLRi1x1MDAxMlx1MDAwZTXkXHUwMDA0zU2wXHJjnM5gXHUwMDFizFVcXDlcdTAwMDTAL5VtpEDPitVfmm2aQDdcdTAwMTJRXHUwMDA0u4RcdGNaT7PNw97+wlSTs6P4w7CB4tiyXHUwMDAyz8w6O7W/XCKOWXtcdTAwMThw3Vx1MDAxY43aXHUwMDExmMfTbq5fOfZ1a1x1MDAwNlx1MDAwZc6TT+xcdTAwMTclm1x1MDAxMDOBnVjnz7Wf/1x1MDAwMFx1MDAxNa9cYlx0In0= - - - - source_endpointsource_ipsource_portdestination_endpointdestination_ipdestination_portconnection \ No newline at end of file diff --git a/web/docs/data-model/taxonomies.md b/web/docs/data-model/taxonomies.md index b53cf5bc175..35fe2006979 100644 --- a/web/docs/data-model/taxonomies.md +++ b/web/docs/data-model/taxonomies.md @@ -39,17 +39,17 @@ schema][ocsf-schema]. We could add [yet another data model](https://xkcd.com/927/), but our goal is that you pick one that you know already or like best. We envision a thriving community around taxonomization, as exemplified with the [OCSF][ocsf]. With -Tenzir, we aim for leveraging the taxonomy of your choice. There are currently -two mechanisms for this purpose: +Tenzir, we aim for leveraging the taxonomy of your choice. -- [Concept](#concepts): a field mapping/alias that lazily resolves at query time -- [Model](#models): a set of concepts that in sum describe a specific entity +:::info Concepts +A [concept](#concepts) is a field mapping/alias that lazily resolves at query +time. +::: -Concepts and models are not embedded in the schema and can therefore evolve -independently from the data typing. This behavior is different from other -systems that normalize by *rewriting* the data on ingest, e.g., elastic with -[ECS][ecs]. We do not advocate for this approach, because it has the following -drawbacks: +Concepts are not embedded in the schema and can therefore evolve independently +from the data typing. This behavior is different from other systems that +normalize by *rewriting* the data on ingest, e.g., elastic with [ECS][ecs]. We +do not advocate for this approach, because it has the following drawbacks: - **Data Lock-in**: if you want to use a different data model tomorrow, you would have to rewrite all your past data, which can be infeasible in some @@ -147,66 +147,3 @@ concepts: You can add new mappings to an existing concept in every module. For example, when adding a new data source that contains an event with a source IP address field, you can define the concept in the corresponding module. - -## Models - -A *model* is made of one or more concepts. An event fulfills a model -if and only if it fulfills all contained concepts. - -Consider again Sysmon and Suricata data for formalizing the notion of a -`connection` that requires the following concepts to be fulfilled: `source_ip`, -`source_port`, `dest_ip`, and `dest_port`. Both `sysmon.NetworkConnection` and -`suricata.flow` fulfil all concepts of the model `connection`. The model -definition looks as follows: - -```yaml -models: - connection: - description: a network connection 4-tuple - definition: - - source_ip - - source_port - - destination_ip - - destination_port -``` - -Models compose like concepts: you can define a new model out of existing models -or out of a mix of concepts and models. However, a concept cannot include a -model. - -In the above example, the `connection` model consists of the `source_endpoint` -and `destination_endpoint` model, each of which contains two concepts: - -![Model Composition](model-composition.excalidraw.svg) - -You can query a model by providing a record literal: - -```c -connection = <_, _, 10.0.0.1, 80> -``` - -The query expression resolution begins with models, continues with concepts, and -terminates when the query consists of extractors only. For example, consider the -model query `destination_endpoint = <10.0.0.1, 80>` where the left-hand side -being the name of a model and the right-hand side a record value. Tenzir resolves -this query into a conjunction first: - -``` -destination_ip == 10.0.0.1 && destination_port == 80 -``` - -Thereafter, the concept resolution takes place again, assuming that there exist -concept definitions for `destination_port` symmetric to `destination_ip`: - -```c -(sysmon.NetworkConnection.RemoteIp == 10.0.0.1 - || suricata.flow.dest_ip == 10.0.0.1 - || zeek.conn.id.resp_h == 10.0.0.1) -&& -(sysmon.NetworkConnection.RemotePort == 80 - || suricata.flow.dest_port == 80 - || zeek.conn.id.resp_p == 80) -``` - -The resolution into conjunctions and disjunctions nicely illustrates the -duality of models as product types and concepts as sum types.