Skip to content

sharding replication

Lorenzo Mangani edited this page Aug 5, 2022 · 3 revisions

qryn: clickhouse sharding + replication

Notes kindly contributed by @coelho

Adjust retention for your desired results. This example sets it to 30 days!
// NOTE: Make sure you set SAMPLES_DAYS: 30 & LABELS_DAYS: 30 to avoid the `ALTER TABLE ...`

// NOTE: You also need to set "distributed_product_mode" to "global" in your profile.
// https://clickhouse.com/docs/en/operations/settings/settings-profiles/

CREATE TABLE qryn.samples_read (
    `fingerprint` UInt64, 
    `timestamp_ms` Int64, 
    `value` Float64, 
    `string` String
)
ENGINE = Merge('qryn', '^(samples|samples_v2)$');

////

CREATE VIEW qryn.samples_read_v2_1 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64, 
    `value` Float64, 
    `string` String
) AS SELECT fingerprint, timestamp_ms * 1000000 AS timestamp_ns, value, string FROM qryn.samples_read;

////

CREATE TABLE qryn.samples_read_v2_2 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64, 
    `value` Float64, 
    `string` String
)
ENGINE = Merge('qryn', '^(samples_read_v2_1|samples_v3)$');

////

CREATE TABLE qryn.samples_v3_ (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64 CODEC(DoubleDelta), 
    `value` Float64 CODEC(Gorilla), 
    `string` String
)
ENGINE = ReplicatedMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{uuid}', '{replica}') 
PARTITION BY toStartOfDay(toDateTime(timestamp_ns / 1000000000)) 
ORDER BY timestamp_ns TTL toDateTime(timestamp_ns / 1000000000) + toIntervalDay(3650) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE qryn.samples_v3 (
    `fingerprint` UInt64, 
    `timestamp_ns` Int64 CODEC(DoubleDelta), 
    `value` Float64 CODEC(Gorilla), 
    `string` String
)
ENGINE = Distributed('{cluster}', 'qryn', 'samples_v3_', fingerprint);

////

CREATE TABLE qryn.settings_ (
    `fingerprint` UInt64, 
    `type` String, 
    `name` String, 
    `value` String, 
    `inserted_at` DateTime64(9, 'UTC')
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{uuid}', '{replica}', inserted_at) 
ORDER BY fingerprint 
SETTINGS index_granularity = 8192;

CREATE TABLE qryn.settings (
    `fingerprint` UInt64, 
    `type` String, 
    `name` String, 
    `value` String, 
    `inserted_at` DateTime64(9, 'UTC')
)
ENGINE = Distributed('{cluster}', 'qryn', 'settings_', fingerprint);

////

CREATE TABLE qryn.time_series_ (
    `date` Date, 
    `fingerprint` UInt64, 
    `labels` String, 
    `name` String
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{uuid}', '{replica}', date) 
PARTITION BY date 
ORDER BY fingerprint TTL date + toIntervalDay(30) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE qryn.time_series (
    `date` Date, 
    `fingerprint` UInt64, 
    `labels` String, 
    `name` String
)
ENGINE = Distributed('{cluster}', 'qryn', 'time_series_', fingerprint);

////

CREATE TABLE qryn.time_series_gin_ (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{uuid}', '{replica}') 
PARTITION BY date 
ORDER BY (key, val, fingerprint) TTL date + toIntervalDay(3650) 
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 3600;

CREATE TABLE qryn.time_series_gin (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
)
ENGINE = Distributed('{cluster}', 'qryn', 'time_series_gin_', fingerprint);

////

CREATE MATERIALIZED VIEW qryn.time_series_gin_view TO qryn.time_series_gin (
    `date` Date, 
    `key` String, 
    `val` String, 
    `fingerprint` UInt64
) AS SELECT date, pairs.1 AS key, pairs.2 AS val, fingerprint FROM qryn.time_series ARRAY JOIN JSONExtractKeysAndValues(time_series.labels, 'String') AS pairs;

////

CREATE TABLE qryn.ver_ (
    `k` UInt64, 
    `ver` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{uuid}', '{replica}', ver) 
ORDER BY k 
SETTINGS index_granularity = 8192;

CREATE TABLE qryn.ver (
    `k` UInt64, 
    `ver` UInt64
)
ENGINE = Distributed('{cluster}', 'qryn', 'ver_', k);

////

INSERT INTO qryn.settings (`fingerprint`, `type`, `name`, `value`, `inserted_at`) 
VALUES (990984054, 'rotate', 'v3_samples_days', '30', '2022-07-31 05:53:52.000000000')
, (4103757074, 'rotate', 'v3_time_series_days', '30', '2022-07-31 05:53:54.000000000')
, (14553193486094442270, 'update', 'v3_1', '1659246830', '2022-07-31 05:53:50.000000000');

////

INSERT INTO qryn.ver (`k`, `ver`) 
VALUES (1, 10);