diff --git a/demos/demos-v1.yaml b/demos/demos-v1.yaml index 15cfcaaa..60154dd1 100644 --- a/demos/demos-v1.yaml +++ b/demos/demos-v1.yaml @@ -50,9 +50,9 @@ demos: - s3 - water-levels manifests: - - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/create-nifi-ingestion-job.yaml # TODO - - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml # TODO - - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/setup-superset.yaml # TODO + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/create-nifi-ingestion-job.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/nifi-kafka-druid-water-level-data/setup-superset.yaml trino-taxi-data: description: Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard documentation: https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html diff --git a/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml b/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml index 06bbdcc6..04484241 100644 --- a/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml +++ b/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml @@ -9,7 +9,7 @@ spec: containers: - name: create-druid-ingestion-job image: docker.stackable.tech/stackable/testing-tools:0.1.0-stackable0.1.0 - command: ["bash", "-c", "curl -X POST -H 'Content-Type: application/json' -d @/tmp/ingestion-job-spec/stations-ingestion-job-spec.json http://druid-coordinator:8081/druid/indexer/v1/supervisor && curl -X POST -H 'Content-Type: application/json' -d @/tmp/ingestion-job-spec/measurements-ingestion-job-spec.json http://druid-coordinator:8081/druid/indexer/v1/supervisor"] + command: ["bash", "-c", "curl -X POST -H 'Content-Type: application/json' -d @/tmp/ingestion-job-spec/stations-ingestion-job-spec.json http://druid-coordinator:8081/druid/indexer/v1/supervisor && curl -X POST -H 'Content-Type: application/json' -d @/tmp/ingestion-job-spec/measurements-ingestion-job-spec.json http://druid-coordinator:8081/druid/indexer/v1/supervisor && curl -X POST -H 'Content-Type: application/json' -d @/tmp/ingestion-job-spec/measurements-compaction-job-spec.json http://druid-coordinator:8081/druid/coordinator/v1/config/compaction"] volumeMounts: - name: ingestion-job-spec mountPath: /tmp/ingestion-job-spec @@ -65,9 +65,9 @@ data: }, "dimensionsSpec": { "dimensions": [ + "uuid", "water_longname", "water_shortname", - "uuid", { "type": "long", "name": "number" @@ -124,11 +124,11 @@ data: "transformSpec": {}, "dimensionsSpec": { "dimensions": [ + "station_uuid", { "type": "long", "name": "value" - }, - "station_uuid" + } ] }, "granularitySpec": { @@ -139,3 +139,20 @@ data: } } } + measurements-compaction-job-spec.json: | + { + "dataSource": "measurements", + "skipOffsetFromLatest": "PT1H", + "granularitySpec": { + "segmentGranularity": "DAY" + }, + "tuningConfig": { + "partitionsSpec": { + "type": "range", + "partitionDimensions": [ + "station_uuid" + ], + "targetRowsPerSegment": 5000000 + } + } + } diff --git a/demos/nifi-kafka-druid-water-level-data/superset-assets.zip b/demos/nifi-kafka-druid-water-level-data/superset-assets.zip index be1d22c2..a0704511 100644 Binary files a/demos/nifi-kafka-druid-water-level-data/superset-assets.zip and b/demos/nifi-kafka-druid-water-level-data/superset-assets.zip differ diff --git a/stacks/kafka-druid-superset-s3/druid.yaml b/stacks/kafka-druid-superset-s3/druid.yaml index 2cfb577a..fef6c203 100644 --- a/stacks/kafka-druid-superset-s3/druid.yaml +++ b/stacks/kafka-druid-superset-s3/druid.yaml @@ -35,7 +35,12 @@ spec: historicals: roleGroups: default: - replicas: 1 + replicas: 2 + configOverrides: + runtime.properties: + druid.processing.numThreads: "4" # As we are on 22.09 we can't set any resources requests or limits + # See https://github.com/stackabletech/druid-operator/issues/306 + druid.segmentCache.locations: '[{"path":"/stackable/var/druid/segment-cache","maxSize":"8g","freeSpacePercent":"5.0"}]' middleManagers: roleGroups: default: diff --git a/stacks/kafka-druid-superset-s3/kafka.yaml b/stacks/kafka-druid-superset-s3/kafka.yaml index bbb8c269..d2278e97 100644 --- a/stacks/kafka-druid-superset-s3/kafka.yaml +++ b/stacks/kafka-druid-superset-s3/kafka.yaml @@ -30,3 +30,6 @@ spec: roleGroups: default: replicas: 1 + configOverrides: + server.properties: + log.retention.bytes: "4294967296" # 4Gi, as this is for every partition and the demos/users might add multiple topics diff --git a/stacks/nifi-kafka-druid-superset-s3/nifi.yaml b/stacks/nifi-kafka-druid-superset-s3/nifi.yaml index 04683b4c..d8617c89 100644 --- a/stacks/nifi-kafka-druid-superset-s3/nifi.yaml +++ b/stacks/nifi-kafka-druid-superset-s3/nifi.yaml @@ -18,17 +18,17 @@ spec: config: resources: memory: - limit: '4Gi' + limit: '6Gi' cpu: min: "500m" max: "4" storage: contentRepo: - capacity: "5Gi" + capacity: "10Gi" databaseRepo: capacity: "5Gi" flowfileRepo: - capacity: "10Gi" + capacity: "5Gi" provenanceRepo: capacity: "5Gi" stateRepo: