/
demos-v2.yaml
228 lines (228 loc) · 10.1 KB
/
demos-v2.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
---
demos:
airflow-scheduled-job:
description: Activate a simple Airflow DAG to run continuously at a set interval
stackableStack: airflow
labels:
- airflow
- job-scheduling
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/01-airflow-spark-clusterrole.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/02-airflow-spark-clusterrolebinding.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml
supportedNamespaces: []
resourceRequests:
cpu: 2401m
memory: 9010Mi
pvc: 24Gi
hbase-hdfs-load-cycling-data:
description: Copy data from S3 bucket to an HBase table
stackableStack: hdfs-hbase
labels:
- hbase
- hdfs
- cycling-tripdata
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/hbase-hdfs-load-cycling-data/create-hfile-and-import-to-hbase.yaml
supportedNamespaces: []
resourceRequests:
cpu: "3"
memory: 5638Mi
pvc: 16Gi
end-to-end-security:
description: Demonstrates end-to-end security across multiple products
stackableStack: end-to-end-security
labels:
- security
- hdfs
- hive
- trino
- superset
- opa
- keycloak
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/end-to-end-security/create-trino-tables.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/end-to-end-security/serviceaccount.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/end-to-end-security/create-spark-report.yaml
supportedNamespaces: []
resourceRequests:
cpu: 6250m
memory: 19586Mi
pvc: 40Gi
nifi-kafka-druid-earthquake-data:
description: Demo ingesting earthquake data into Kafka using NiFi, streaming it into Druid and creating a Superset dashboard
documentation: https://docs.stackable.tech/stackablectl/stable/demos/nifi-kafka-druid-earthquake-data.html
stackableStack: nifi-kafka-druid-superset-s3
labels:
- nifi
- kafka
- druid
- superset
- minio
- s3
- earthquakes
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-earthquake-data/create-nifi-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-earthquake-data/create-druid-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-earthquake-data/setup-superset.yaml
supportedNamespaces: ["default"]
resourceRequests:
cpu: 8700m
memory: 29746Mi
pvc: 75Gi # 30Gi for Kafka
nifi-kafka-druid-water-level-data:
description: Demo ingesting water level data into Kafka using NiFi, streaming it into Druid and creating a Superset dashboard
documentation: https://docs.stackable.tech/stackablectl/stable/demos/nifi-kafka-druid-water-level-data.html
stackableStack: nifi-kafka-druid-superset-s3
labels:
- nifi
- kafka
- druid
- superset
- minio
- s3
- water-levels
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-water-level-data/create-nifi-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-water-level-data/create-druid-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/nifi-kafka-druid-water-level-data/setup-superset.yaml
supportedNamespaces: ["default"]
resourceRequests:
cpu: 8900m
memory: 30042Mi
pvc: 75Gi # 30Gi for Kafka
spark-k8s-anomaly-detection-taxi-data:
description: Demo loading New York taxi data into an S3 bucket and carrying out an anomaly detection analysis on it
documentation: https://docs.stackable.tech/stackablectl/stable/demos/spark-k8s-anomaly-detection-taxi-data.html
stackableStack: spark-trino-superset-s3
labels:
- trino
- superset
- minio
- s3
- ny-taxi-data
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/spark-k8s-anomaly-detection-taxi-data/serviceaccount.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/spark-k8s-anomaly-detection-taxi-data/load-test-data.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/spark-k8s-anomaly-detection-taxi-data/setup-superset.yaml
supportedNamespaces: []
resourceRequests:
cpu: 6400m
memory: 12622Mi
pvc: 35Gi
trino-iceberg:
description: Demo containing Trino using Apache Iceberg as a S3 data lakehouse
documentation: https://docs.stackable.tech/stackablectl/stable/demos/trino-iceberg.html
stackableStack: trino-iceberg
labels:
- trino
- iceberg
- minio
- s3
manifests: []
supportedNamespaces: []
resourceRequests:
cpu: 8550m
memory: 26662Mi
pvc: 110Gi # 100Gi for MinIO
trino-taxi-data:
description: Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard
documentation: https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html
stackableStack: trino-superset-s3
labels:
- trino
- superset
- minio
- s3
- ny-taxi-data
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-taxi-data/load-test-data.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-taxi-data/create-table-in-trino.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-taxi-data/setup-superset.yaml
supportedNamespaces: []
resourceRequests:
cpu: 6800m
memory: 15822Mi
pvc: 28Gi
data-lakehouse-iceberg-trino-spark:
description: Data lakehouse using Iceberg lakehouse on S3, Trino as query engine, Spark for streaming ingest and Superset for data visualization. Multiple datasources like taxi data, water levels in Germany, earthquakes, e-charging stations and more are loaded.
documentation: https://docs.stackable.tech/stackablectl/stable/demos/data-lakehouse-iceberg-trino-spark.html
stackableStack: data-lakehouse-iceberg-trino-spark
labels:
- iceberg
- trino
- spark
- superset
- kafka
- nifi
- minio
- s3
- ny-taxi-data
- water-levels
- earthquakes
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/serviceaccount.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/load-test-data.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/create-trino-tables.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/setup-superset.yaml
supportedNamespaces: ["default"]
resourceRequests:
cpu: "80"
memory: 200Gi
pvc: 1Ti
jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data:
description: Jupyterhub with PySpark and HDFS integration
documentation: https://docs.stackable.tech/stackablectl/stable/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.html
stackableStack: jupyterhub-pyspark-hdfs
labels:
- jupyterhub
- hdfs
- pyspark
- ny-taxi-data
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/load-test-data.yaml
supportedNamespaces: []
resourceRequests:
cpu: 3350m
memory: 5098Mi
pvc: 22Gi
logging:
description: Demo showing the logging stack in action
documentation: https://docs.stackable.tech/stackablectl/stable/demos/logging.html
stackableStack: logging
labels:
- logging
- opensearch
- opensearch-dashboards
- vector
- zookeeper
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/logging/zookeeper.yaml
supportedNamespaces: []
resourceRequests:
cpu: 6500m
memory: 5098Mi
pvc: 27Gi
signal-processing:
description: Demo showing signal processing on time-series data
documentation: https://docs.stackable.tech/stackablectl/stable/demos/signal-processing.html
stackableStack: signal-processing
labels:
- nifi
- jupyterhub
- grafana-dashboards
- zookeeper
manifests:
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/signal-processing/serviceaccount.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/signal-processing/create-timescale-tables.yaml
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/signal-processing/create-nifi-ingestion-job.yaml
supportedNamespaces: []
resourceRequests:
cpu: "3"
memory: 5098Mi
pvc: 16Gi