Installation

protoc-gen-druid-spec

Installation

go install github.com/nguyensinhtu/protoc-gen-druid-spec@latest

Usage

protoc --druid-spect_out=path/to/outdir foo.proto

protoc --druid-spec_out=path/to/out/dir foo.proto --proto_path=. --proto_path=<path_to_google_proto_folder>/src

Example

syntax = "proto3";

package foo;

import "druid_ingestion.proto";
import "druid_spec.proto";

import "google/protobuf/wrappers.proto";
import "google/protobuf/timestamp.proto";

message Bar {
  option (gen_druid_spec.druid_opts) = {
    data_source_name: "bar_proto3_table"
    segment_granularity: "day"
    query_granularity: "day"
  };

  string client_id = 1 [ (gen_druid_spec.spec) = {
    dimension : {
      multi_value_handling : "SORTED_SET"
      create_bitmap_index : {value: true}
    }
    metric : {
      metric_name : "client_id_sketch"
      size : 16384
      type : "thetaSketch"
    }
  } ];

  Baz baz = 2 
    [ (gen_druid_spec.spec).flatten = {prefix : "baz_" } ];;

  TimeSpec time_spec = 4[ (gen_druid_spec.spec).flatten = { prefix : "time_spec_" } ];
}

message Baz {
  int32 a = 1 [ (gen_druid_spec.spec).metric = {
    metric_name: "a_metric"
    size: 16384 
    type: "thetaSketch"
  }];
}

message TimeSpec {
  string date_key = 1[ (gen_druid_spec.spec).timestamp = {} ];
}

Output

{
 "spec": {
  "dataSchema": {
   "dataSource": "bar_proto3_table",
   "timestampSpec": {
    "column": "time_spec__date_key"
   },
   "dimensionsSpec": {
    "dimensions": [
     {
      "name": "client_id",
      "type": "string",
      "multiValueHandling": "SORTED_SET",
      "createBitmapIndex": true
     },
     {
      "name": "baz__a",
      "type": "long"
     }
    ],
    "dimensionExclusions": [
     "time_spec__date_key"
    ]
   },
   "metricsSpec": [
    {
     "name": "client_id_sketch",
     "type": "thetaSketch",
     "isInputThetaSketch": false,
     "fieldName": "client_id",
     "size": 16384
    },
    {
     "name": "a_metric",
     "type": "thetaSketch",
     "isInputThetaSketch": false,
     "fieldName": "baz__a",
     "size": 16384
    }
   ],
   "granularitySpec": {
    "type": "uniform",
    "segmentGranularity": "day",
    "queryGranularity": "day",
    "rollup": true,
    "intervals": []
   }
  },
  "ioConfig": {
   "inputFormat": {
    "type": "json",
    "flattenSpec": {
     "fields": [
      {
       "type": "jq",
       "name": "baz__a",
       "expr": ".baz.a"
      },
      {
       "type": "jq",
       "name": "time_spec__date_key",
       "expr": ".time_spec.date_key"
      }
     ],
     "useFieldDiscovery": false
    }
   }
  }
 }
}

Conceptually, after input data records are read, Druid applies ingestion spec components in a particular order: first flattenSpec (if any), then timestampSpec, then transformSpec, and finally dimensionsSpec and metricsSpec. Keep this in mind when writing your ingestion spec.

flattenSpec

Parent field separate with nested field by double underscore '__'

if you dont set prefix or empty prefix, default will be protobuf field name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "foo__i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want to remove parent name set ignore_name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {ignore_name: true} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want to use diffirent prefix name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {prefix: "my_custom_name"} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "my_custom_name__i1",
      "expr": ".foo.i1"
    }
  ],
  "useFieldDiscovery": false
}

if you want use diffirent name set output_name

message Bar {
   Foo foo = 4[ (gen_druid_spec.spec).flatten = {output_name: "rename_foo"} ];
}

message Foo {
  int32 i1 = 1;
}

Output

{
  "fields": [
    {
      "type": "jq",
      "name": "rename_foo__i1",
      "expr": ".rename.i1"
    }
  ],
  "useFieldDiscovery": false
}

timestampSpec

if field is set to be timestamp it will be ignore in dimensionExclusions
Can not apply flattenSpec, metricSpec, dimensionSpec with timestampSpec
Can not apply timestampSpec to record type
Can not apply multiple timestampSpec to one configured data source

Query filters

TODO - Support Selector filter, Logical expression filters

IO Config

We do not recommend config the IOConfig for specific schema, because we can re-use one schema for multiple input source , but if you want you still config it

message Bar {
  option (gen_druid_spec.druid_opts) = {
    data_source_name: "bar_proto3_table"
    
    io_config: {
      topic: "abc"
      bootstrap_servers: "PLAINTEXT://abc.com"
      use_earliest_offset: false
    }
    
    ingestion_type: "kafka"
  };
}

Output

{
 "spec": {
  "dataSchema": {
   "dataSource": "bar_proto3_table",
   "dimensionsSpec": {},
   "granularitySpec": {
    "type": "uniform",
    "segmentGranularity": "day",
    "queryGranularity": "none",
    "rollup": true,
    "intervals": []
   }
  },
  "ioConfig": {
   "consumerProperties": {
    "bootstrap.servers": "PLAINTEXT://abc.com"
   },
   "inputFormat": {
    "type": "json"
   },
   "topic": "abc",
   "useEarliestOffset": false
  }
 },
 "type": "kafka"
}

Name		Name	Last commit message	Last commit date
Latest commit History 30 Commits
.github/workflows		.github/workflows
examples		examples
pkg/converter		pkg/converter
protos		protos
.gitignore		.gitignore
LICENSE.md		LICENSE.md
Makefile		Makefile
README.md		README.md
druid_ingestion.proto		druid_ingestion.proto
druid_spec.proto		druid_spec.proto
go.mod		go.mod
go.sum		go.sum
main.go		main.go

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

protoc-gen-druid-spec

Installation

Usage

Example

flattenSpec

timestampSpec

Query filters

IO Config

About

Releases 3

Packages

Languages

License

nguyensinhtu/protoc-gen-druid-spec

Folders and files

Latest commit

History

Repository files navigation

protoc-gen-druid-spec

Installation

Usage

Example

flattenSpec

timestampSpec

Query filters

IO Config

About

Resources

License

Stars

Watchers

Forks

Releases 3

Packages 0

Languages

Packages