In [1]:
from functools import partial
from pathlib import Path
from pprint import pprint
from time import sleep

In [2]:
import psycopg2
from psycopg2.extras import RealDictCursor

In [3]:
from ingestion import (
    DBNAME,
    HOST,
    PASSWORD,
    PORT,
    USER,
    ingest_data_files,
    reset_db_structures,
)

## Reset DB Structures

In [4]:
input_csvs_small = [Path("/workspaces/data-playground/data/user_events.small.csv")]
input_csvs_large = [
    Path("/workspaces/data-playground/data/user_events.large.part01.csv"),
    Path("/workspaces/data-playground/data/user_events.large.part02.csv"),
]

In [5]:
reset_db_structures()
ingest_data_files(input_csvs_small)

Dropping db 'db_user_events'
Creating db 'db_user_events'
Creating table 't_user_events'
Copying data from /workspaces/data-playground/data/user_events.small.csv to table 't_user_events'
Count of 't_user_events': 885_129


## Utility Functions

In [6]:
pprint = partial(pprint, sort_dicts=False, underscore_numbers=True)

In [7]:
def execute_trans_query(query: str, *, fetch_size: int = 0) -> list[dict]:
    with (
        psycopg2.connect(
            host=HOST,
            port=PORT,
            user=USER,
            password=PASSWORD,
            database=DBNAME,
            cursor_factory=RealDictCursor,
        ) as conn,
        conn.cursor() as cur,
    ):
        cur.execute(query)
        match fetch_size:
            case 0:
                return None
            case 1:
                return dict(cur.fetchone())
        return [dict(res) for res in cur.fetchmany(fetch_size)]

In [8]:
def execute_non_trans_query(query: str, *, fetch_size: int = 0) -> list[dict]:
    conn = psycopg2.connect(host=HOST, user=USER, password=PASSWORD, dbname=DBNAME)
    conn.autocommit = True
    cur = conn.cursor()
    try:
        cur.execute(query)
        match fetch_size:
            case 0:
                return None
            case 1:
                return dict(cur.fetchone())
        return [dict(res) for res in cur.fetchmany(fetch_size)]
    except Exception:
        cur.close()
        conn.close()
        raise
    finally:
        cur.close()
        conn.close()

In [9]:
def print_count(table_name="t_user_events"):
    query = f"SELECT count(1) FROM {table_name}"
    res = execute_trans_query(query, fetch_size=1)
    print(f"Count: {res['count']:_}")

In [10]:
def print_current_hypertables():
    query = """
        SELECT hypertable_name, num_dimensions, num_chunks, compression_enabled
        FROM timescaledb_information.hypertables;"""
    res = execute_trans_query(query, fetch_size=3)
    print("Current hypertables:")
    pprint(res)

In [11]:
def print_chunk_info(table_name="t_user_events", order_by_stmt="", fetch_size=3):
    query = f"""
        SELECT chunk_name, is_compressed, range_start, range_end
        FROM timescaledb_information.chunks
        WHERE hypertable_name = '{table_name}'
        {order_by_stmt};"""
    res = execute_trans_query(query, fetch_size=fetch_size)
    print("Current chunks info:")
    pprint(res)

In [12]:
def print_compression_settings(fetch_size=10):
    query = """
        SELECT *
        FROM timescaledb_information.compression_settings
        WHERE hypertable_name = 't_user_events';"""
    res = execute_trans_query(query, fetch_size=fetch_size)
    print("Compression settings:")
    pprint(res)

## Query Statements

### Setup

In [13]:
query = """
    SELECT extversion
    FROM pg_extension
    WHERE extname = 'timescaledb';"""
version = execute_trans_query(query, fetch_size=1)["extversion"]
print(f"TimescaleDB version: {version}")

TimescaleDB version: 2.18.0


In [14]:
print_current_hypertables()

Current hypertables:
[]


In [15]:
print_chunk_info()

Current chunks info:
[]


In [16]:
query = """
    SELECT create_hypertable(
                't_user_events',
                'event_time',
                chunk_time_interval => INTERVAL '1 day',
                migrate_data => TRUE,
                if_not_exists => TRUE);"""
res = execute_trans_query(query, fetch_size=0)

In [17]:
print_current_hypertables()

Current hypertables:
[{'hypertable_name': 't_user_events',
  'num_dimensions': 1,
  'num_chunks': 158,
  'compression_enabled': False}]


In [18]:
print_chunk_info()

Current chunks info:
[{'chunk_name': '_hyper_1_1_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 24, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_2_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_3_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 27, 0, 0, tzinfo=datetime.timezone.utc)}]


In [19]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Result  (cost=0.00..0.00 rows=0 width=0)
  One-Time Filter: false


In [20]:
query = """
    CREATE MATERIALIZED VIEW mvw_event_counts
    WITH (timescaledb.continuous) AS
    SELECT
        time_bucket('1 hour', event_time) AS bucket,
        event_type,
        COUNT(*) AS event_count
    FROM t_user_events
    GROUP BY bucket, event_type;"""
res = execute_non_trans_query(query, fetch_size=0)

In [21]:
query = """
    SELECT add_continuous_aggregate_policy(
                'mvw_event_counts',
                start_offset => INTERVAL '100 years',
                end_offset => INTERVAL '1 hour',
                schedule_interval => INTERVAL '1 day');"""
res = execute_trans_query(query, fetch_size=0)

In [22]:
query = """
    SELECT *
    FROM mvw_event_counts
    ORDER BY bucket ASC;"""
res = execute_trans_query(query, fetch_size=6)
pprint(res)

[{'bucket': datetime.datetime(2020, 9, 24, 11, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'event_count': 13},
 {'bucket': datetime.datetime(2020, 9, 24, 12, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'purchase',
  'event_count': 18},
 {'bucket': datetime.datetime(2020, 9, 24, 12, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'event_count': 238},
 {'bucket': datetime.datetime(2020, 9, 24, 12, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'cart',
  'event_count': 14},
 {'bucket': datetime.datetime(2020, 9, 24, 13, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'purchase',
  'event_count': 10},
 {'bucket': datetime.datetime(2020, 9, 24, 13, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'event_count': 255}]


In [23]:
query = """
    EXPLAIN
    SELECT *
    FROM mvw_event_counts
    ORDER BY bucket ASC;"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Custom Scan (ChunkAppend) on _materialized_hypertable_2  (cost=0.15..357.33 rows=5270 width=234)
  Order: _materialized_hypertable_2.bucket
  ->  Index Scan Backward using _hyper_2_175_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_175_chunk  (cost=0.15..18.00 rows=310 width=234)
  ->  Index Scan Backward using _hyper_2_170_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_170_chunk  (cost=0.27..21.42 rows=310 width=234)
  ->  Index Scan Backward using _hyper_2_161_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_161_chunk  (cost=0.27..21.42 rows=310 width=234)
  ->  Index Scan Backward using _hyper_2_167_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_167_chunk  (cost=0.27..21.42 rows=310 width=234)
  ->  Index Scan Backward using _hyper_2_171_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_171_chunk  (cost=0.27..21.42 rows=310 width=234)
  ->  Index Scan Backward using _hyper_2_169_chunk__materialized_hypertable_2_bucket_idx on _hyper_2_169_chunk

In [24]:
query = """
    EXPLAIN
    SELECT *
    FROM mvw_event_counts
    WHERE bucket >= '2020-09-24 11:00:00'
        AND bucket < '2020-09-24 12:00:00';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Bitmap Heap Scan on _hyper_2_175_chunk  (cost=1.27..3.41 rows=2 width=234)
  Recheck Cond: ((bucket >= '2020-09-24 11:00:00+00'::timestamp with time zone) AND (bucket < '2020-09-24 12:00:00+00'::timestamp with time zone))
  ->  Bitmap Index Scan on _hyper_2_175_chunk__materialized_hypertable_2_bucket_idx  (cost=0.00..1.27 rows=2 width=0)
        Index Cond: ((bucket >= '2020-09-24 11:00:00+00'::timestamp with time zone) AND (bucket < '2020-09-24 12:00:00+00'::timestamp with time zone))


In [25]:
query = """
    SELECT set_chunk_time_interval('t_user_events', INTERVAL '1 hour');"""
res = execute_non_trans_query(query, fetch_size=0)
# NOTE: the new chunk interval applies only to future chunks

In [26]:
print_chunk_info(fetch_size=3)

Current chunks info:
[{'chunk_name': '_hyper_1_1_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 24, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_2_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_3_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 27, 0, 0, tzinfo=datetime.timezone.utc)}]


In [27]:
query = """
    INSERT INTO t_user_events
    SELECT
        event_time - (INTERVAL '10 years'),
        event_type,
        product_id,
        category_id,
        category_code,
        brand,
        price,
        user_id,
        user_session
    FROM t_user_events;"""
res = execute_trans_query(query, fetch_size=0)

In [28]:
print_chunk_info(
    table_name="t_user_events",
    order_by_stmt="ORDER BY range_end ASC",
    fetch_size=3,
)

Current chunks info:
[{'chunk_name': '_hyper_1_176_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2010, 9, 24, 11, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2010, 9, 24, 12, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_177_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2010, 9, 24, 12, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2010, 9, 24, 13, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_178_chunk',
  'is_compressed': False,
  'range_start': datetime.datetime(2010, 9, 24, 13, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2010, 9, 24, 14, 0, tzinfo=datetime.timezone.utc)}]


In [29]:
query = """
    SELECT
        event_time,
        event_type,
        category_code,
        user_id
    FROM t_user_events
    WHERE user_id = 1515915625519380411
    AND event_time > '2015-01-01';"""
res = execute_trans_query(query, fetch_size=3)
pprint(res)

[{'event_time': datetime.datetime(2020, 9, 24, 11, 57, 26, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'category_code': 'computers.components.cooler',
  'user_id': 1_515_915_625_519_380_411},
 {'event_time': datetime.datetime(2020, 9, 24, 12, 15, 11, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'category_code': 'computers.components.power_supply',
  'user_id': 1_515_915_625_519_380_411},
 {'event_time': datetime.datetime(2020, 9, 24, 12, 19, 57, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'category_code': 'computers.components.cooler',
  'user_id': 1_515_915_625_519_380_411}]


In [30]:
query = """
    SELECT
        brand,
        avg(price) AS avg_price
    FROM t_user_events
    WHERE event_time > '2015-01-01'
    GROUP BY brand;"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'brand': 'pro', 'avg_price': Decimal('33.1026666666666667')},
 {'brand': 'goip', 'avg_price': Decimal('220.6800000000000000')},
 {'brand': 'knipex', 'avg_price': Decimal('54.2400000000000000')},
 {'brand': 'zmi', 'avg_price': Decimal('61.2700000000000000')},
 {'brand': 'hyperx', 'avg_price': Decimal('90.0882915863840719')}]


In [31]:
query = """
    SELECT
        user_id,
        count(*) AS event_count
    FROM t_user_events
    WHERE event_time > '2015-01-01'
    GROUP BY user_id
    ORDER BY count(*) DESC;"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'user_id': 1_515_915_625_554_995_474, 'event_count': 572},
 {'user_id': 1_515_915_625_527_763_086, 'event_count': 424},
 {'user_id': 1_515_915_625_591_251_010, 'event_count': 363},
 {'user_id': 1_515_915_625_591_659_523, 'event_count': 339},
 {'user_id': 1_515_915_625_537_803_839, 'event_count': 329}]


In [32]:
query = """
    SELECT DISTINCT event_type
    FROM t_user_events
    WHERE user_id = 1515915625554995474
        AND event_time > '2015-01-01';"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'event_type': 'view'}]


In [33]:
query = """
    UPDATE t_user_events
    SET event_type = 'hover'
    WHERE user_id = 1515915625554995474
        AND event_time > '2015-01-01';"""
res = execute_trans_query(query, fetch_size=0)

In [34]:
query = """
    SELECT DISTINCT event_type
    FROM t_user_events
    WHERE user_id = 1515915625554995474
        AND event_time > '2015-01-01';"""
res = execute_trans_query(query, fetch_size=1)
pprint(res)

{'event_type': 'hover'}


In [35]:
query = """
    SELECT count(*)
    FROM t_user_events
    WHERE event_time < '2015-01-01';"""
res = execute_trans_query(query, fetch_size=1)
pprint(res)

{'count': 885_129}


In [36]:
query = """
    DELETE FROM t_user_events
    WHERE event_time < '2015-01-01';"""
res = execute_trans_query(query, fetch_size=0)
pprint(res)

None


In [37]:
query = """
    SELECT count(*)
    FROM t_user_events
    WHERE event_time < '2015-01-01';"""
res = execute_trans_query(query, fetch_size=1)
pprint(res)

{'count': 0}


In [38]:
query = """
    SELECT event_time, user_session
    FROM t_user_events
    WHERE event_type = 'purchase'
        AND event_time > '2020-12-08'
        AND event_time < '2020-12-08 05:00:00 UTC';"""
res = execute_trans_query(query, fetch_size=3)
pprint(res)

[{'event_time': datetime.datetime(2020, 12, 8, 4, 38, 3, tzinfo=datetime.timezone.utc),
  'user_session': 'L6ew7YkyVP'},
 {'event_time': datetime.datetime(2020, 12, 8, 4, 37, 46, tzinfo=datetime.timezone.utc),
  'user_session': 'p9iO5LmqMB'},
 {'event_time': datetime.datetime(2020, 12, 8, 4, 34, 51, tzinfo=datetime.timezone.utc),
  'user_session': 'XUxkExWX01'}]


In [39]:
query = """
    INSERT INTO t_user_events (
        event_time, event_type, product_id,
        category_id, category_code, brand,
        price, user_id, user_session)
    VALUES
        ('2024-02-01 12:34:56', 'purchase', 123,
            456, 'electronics.smartphone', 'BrandX',
            599.99, 789, 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'),
        ('2024-02-01 13:45:12', 'view', 124,
            457, 'electronics.tablet', 'BrandY',
            299.99, 790, 'b1eebc99-9c0b-4ef8-bb6d-6bb9bd380a12'),
        ('2024-02-01 14:56:23', 'cart', 125,
            458, 'electronics.laptop', 'BrandZ',
            999.99, 791, 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13');"""
res = execute_trans_query(query, fetch_size=0)

In [40]:
query = """
    SELECT *
    FROM t_user_events
    WHERE event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=3)
pprint(res)

[{'event_time': datetime.datetime(2024, 2, 1, 12, 34, 56, tzinfo=datetime.timezone.utc),
  'event_type': 'purchase',
  'product_id': 123,
  'category_id': 456,
  'category_code': 'electronics.smartphone',
  'brand': 'BrandX',
  'price': Decimal('599.99'),
  'user_id': 789,
  'user_session': 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'},
 {'event_time': datetime.datetime(2024, 2, 1, 13, 45, 12, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'product_id': 124,
  'category_id': 457,
  'category_code': 'electronics.tablet',
  'brand': 'BrandY',
  'price': Decimal('299.99'),
  'user_id': 790,
  'user_session': 'b1eebc99-9c0b-4ef8-bb6d-6bb9bd380a12'},
 {'event_time': datetime.datetime(2024, 2, 1, 14, 56, 23, tzinfo=datetime.timezone.utc),
  'event_type': 'cart',
  'product_id': 125,
  'category_id': 458,
  'category_code': 'electronics.laptop',
  'brand': 'BrandZ',
  'price': Decimal('999.99'),
  'user_id': 791,
  'user_session': 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'}]


In [41]:
query = """
    UPDATE t_user_events
    SET price = '00.99'
    WHERE product_id = 124;"""
res = execute_trans_query(query, fetch_size=0)

In [42]:
query = """
    CREATE INDEX ix_events_user_id ON t_user_events (user_id);"""
res = execute_trans_query(query, fetch_size=0)

In [43]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789;"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.28..9387.63 rows=4064 width=872)
  ->  Index Scan using _hyper_1_1_chunk_ix_events_user_id on _hyper_1_1_chunk  (cost=0.28..2.50 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_2_chunk_ix_events_user_id on _hyper_1_2_chunk  (cost=0.28..2.50 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_3_chunk_ix_events_user_id on _hyper_1_3_chunk  (cost=0.28..2.50 rows=1 width=73)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_4_chunk_ix_events_user_id on _hyper_1_4_chunk  (cost=0.28..2.50 rows=1 width=74)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_5_chunk_ix_events_user_id on _hyper_1_5_chunk  (cost=0.28..2.50 rows=1 width=73)


In [44]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.07 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))


In [45]:
query = """
    CREATE INDEX ix_events_time_user_id ON t_user_events (event_time DESC, user_id);"""
res = execute_trans_query(query, fetch_size=0)

In [46]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789;"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.28..9355.23 rows=4064 width=872)
  ->  Index Scan using _hyper_1_1_chunk_ix_events_user_id on _hyper_1_1_chunk  (cost=0.28..2.50 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_2_chunk_ix_events_user_id on _hyper_1_2_chunk  (cost=0.28..2.50 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_3_chunk_ix_events_user_id on _hyper_1_3_chunk  (cost=0.28..2.50 rows=1 width=73)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_4_chunk_ix_events_user_id on _hyper_1_4_chunk  (cost=0.28..2.50 rows=1 width=74)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_5_chunk_ix_events_user_id on _hyper_1_5_chunk  (cost=0.28..2.50 rows=1 width=73)


In [47]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.07 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))


In [48]:
sleep(60)
query = """
    DROP INDEX IF EXISTS ix_events_user_id;"""
res = execute_trans_query(query, fetch_size=0)

In [49]:
query = """
    CREATE INDEX idx_event_time_type ON t_user_events(event_time DESC, event_type)
        WITH (timescaledb.transaction_per_chunk);"""
res = execute_non_trans_query(query, fetch_size=0)

In [50]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789;"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.28..11265.09 rows=4064 width=872)
  ->  Index Scan using _hyper_1_1_chunk_ix_events_time_user_id on _hyper_1_1_chunk  (cost=0.28..30.48 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_2_chunk_ix_events_time_user_id on _hyper_1_2_chunk  (cost=0.28..54.10 rows=1 width=72)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_3_chunk_ix_events_time_user_id on _hyper_1_3_chunk  (cost=0.28..44.98 rows=1 width=73)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_4_chunk_ix_events_time_user_id on _hyper_1_4_chunk  (cost=0.28..48.97 rows=1 width=74)
        Index Cond: (user_id = 789)
  ->  Index Scan using _hyper_1_5_chunk_ix_events_time_user_id on _hyper_1_5_chunk  (cost=0.28..58.75 rows=1 width=73)


In [51]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_id = 789
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.07 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND (user_id = 789))


In [52]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_session = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Gather  (cost=1000.00..21233.36 rows=3978 width=891)
  Workers Planned: 6
  ->  Parallel Append  (cost=0.00..19835.56 rows=1 width=890)
        ->  Seq Scan on _hyper_1_191_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
        ->  Seq Scan on _hyper_1_211_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
        ->  Seq Scan on _hyper_1_213_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
        ->  Seq Scan on _hyper_1_214_chunk  (cost=0.00..1.00 rows=1 width=932)


In [53]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_session = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.07 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))


In [54]:
query = """
    CREATE INDEX idx_event_session ON t_user_events USING HASH(user_session);"""
res = execute_trans_query(query, fetch_size=0)

In [55]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_session = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..571.80 rows=3978 width=890)
  ->  Index Scan using _hyper_1_1_chunk_idx_event_session on _hyper_1_1_chunk  (cost=0.00..2.22 rows=1 width=72)
        Index Cond: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
  ->  Index Scan using _hyper_1_2_chunk_idx_event_session on _hyper_1_2_chunk  (cost=0.00..2.22 rows=1 width=72)
        Index Cond: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
  ->  Index Scan using _hyper_1_3_chunk_idx_event_session on _hyper_1_3_chunk  (cost=0.00..2.22 rows=1 width=73)
        Index Cond: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
  ->  Index Scan using _hyper_1_4_chunk_idx_event_session on _hyper_1_4_chunk  (cost=0.00..2.22 rows=1 width=74)
        Index Cond: ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text)
  ->  Index Scan using _hyper_1_5_chunk_idx_event_session on _hyper_1_5_chunk  (cost=0.00..2.22 rows=1 width=73)


In [56]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE user_session = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'
        AND event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.07 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.02 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone) AND ((user_session)::text = 'c2eebc99-9c0b-4ef8-bb6d-6bb9bd380a13'::text))


In [57]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.06 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))


In [58]:
query = """
    CREATE INDEX idx_event_time ON t_user_events (event_time);"""
res = execute_trans_query(query, fetch_size=0)

In [59]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE event_time > '2024-02-01'
        AND event_time < '2024-02-02';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Append  (cost=0.00..3.06 rows=3 width=932)
  ->  Seq Scan on _hyper_1_3955_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))
  ->  Seq Scan on _hyper_1_3956_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))
  ->  Seq Scan on _hyper_1_3957_chunk  (cost=0.00..1.01 rows=1 width=932)
        Filter: ((event_time > '2024-02-01 00:00:00+00'::timestamp with time zone) AND (event_time < '2024-02-02 00:00:00+00'::timestamp with time zone))


In [60]:
query = """
    SELECT show_chunks('t_user_events');"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'show_chunks': '_timescaledb_internal._hyper_1_1_chunk'},
 {'show_chunks': '_timescaledb_internal._hyper_1_2_chunk'},
 {'show_chunks': '_timescaledb_internal._hyper_1_3_chunk'},
 {'show_chunks': '_timescaledb_internal._hyper_1_4_chunk'},
 {'show_chunks': '_timescaledb_internal._hyper_1_5_chunk'}]


In [61]:
query = """
    SELECT *
    FROM timescaledb_information.dimensions
    WHERE hypertable_name = 't_user_events';"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'dimension_number': 1,
  'column_name': 'event_time',
  'column_type': 'timestamp with time zone',
  'dimension_type': 'Time',
  'time_interval': datetime.timedelta(seconds=3600),
  'integer_interval': None,
  'integer_now_func': None,
  'num_partitions': None}]


In [62]:
query = """
    SELECT COUNT(DISTINCT category_code) FROM t_user_events"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'count': 110}]


In [63]:
query = """
    EXPLAIN
    SELECT *
    FROM t_user_events
    WHERE category_code = 'computers.peripherals.wifi';"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Gather  (cost=1000.00..21929.49 rows=10880 width=372)
  Workers Planned: 6
  ->  Parallel Append  (cost=0.00..19841.49 rows=1185 width=372)
        ->  Seq Scan on _hyper_1_191_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((category_code)::text = 'computers.peripherals.wifi'::text)
        ->  Seq Scan on _hyper_1_211_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((category_code)::text = 'computers.peripherals.wifi'::text)
        ->  Seq Scan on _hyper_1_213_chunk  (cost=0.00..1.00 rows=1 width=932)
              Filter: ((category_code)::text = 'computers.peripherals.wifi'::text)
        ->  Seq Scan on _hyper_1_214_chunk  (cost=0.00..1.00 rows=1 width=932)


In [64]:
query = """
    EXPLAIN
    SELECT time_bucket('1 hour', event_time) AS bucket,
        category_code,
        AVG(price) AS avg_price
    FROM t_user_events
    WHERE event_time < '2015-01-01'
    GROUP BY bucket, category_code;"""
res = execute_trans_query(query, fetch_size=20)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

Finalize HashAggregate  (cost=427385.22..427441.73 rows=3767 width=258)
  Group Key: (time_bucket('01:00:00'::interval, _hyper_1_176_chunk.event_time)), _hyper_1_176_chunk.category_code
  ->  Append  (cost=11.27..285030.29 rows=14235493 width=258)
        ->  Partial HashAggregate  (cost=11.27..67.78 rows=3767 width=258)
              Group Key: time_bucket('01:00:00'::interval, _hyper_1_176_chunk.event_time), _hyper_1_176_chunk.category_code
              ->  Seq Scan on _hyper_1_176_chunk  (cost=0.00..11.07 rows=27 width=258)
                    Filter: (event_time < '2015-01-01 00:00:00+00'::timestamp with time zone)
        ->  Partial HashAggregate  (cost=0.01..56.52 rows=3767 width=258)
              Group Key: time_bucket('01:00:00'::interval, _hyper_1_177_chunk.event_time), _hyper_1_177_chunk.category_code
              ->  Seq Scan on _hyper_1_177_chunk  (cost=0.00..0.00 rows=1 width=258)
                    Filter: (event_time < '2015-01-01 00:00:00+00'::timestamp with time z

In [65]:
query = """
    SELECT add_dimension(
                't_user_events',
                by_hash('category_code', 110));"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'add_dimension': '(3,t)'}]


In [66]:
query = """
    SELECT *
    FROM timescaledb_information.dimensions
    WHERE hypertable_name = 't_user_events';"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'dimension_number': 1,
  'column_name': 'event_time',
  'column_type': 'timestamp with time zone',
  'dimension_type': 'Time',
  'time_interval': datetime.timedelta(seconds=3600),
  'integer_interval': None,
  'integer_now_func': None,
  'num_partitions': None},
 {'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'dimension_number': 2,
  'column_name': 'category_code',
  'column_type': 'character varying',
  'dimension_type': 'Space',
  'time_interval': None,
  'integer_interval': None,
  'integer_now_func': None,
  'num_partitions': 110}]


In [67]:
query = """
    SELECT *
    FROM mvw_event_counts
    WHERE bucket > '2024-01-01'
    ORDER BY bucket ASC;"""
res = execute_trans_query(query, fetch_size=10)
pprint(res)

[]


In [68]:
query = """
    CALL refresh_continuous_aggregate(
        'mvw_event_counts',
        '2024-01-01',
        '2024-03-31');"""
res = execute_non_trans_query(query, fetch_size=0)

In [69]:
sleep(60)
query = """
    SELECT *
    FROM mvw_event_counts
    WHERE bucket > '2024-01-01'
    ORDER BY bucket ASC;"""
res = execute_trans_query(query, fetch_size=10)
pprint(res)

[{'bucket': datetime.datetime(2024, 2, 1, 12, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'purchase',
  'event_count': 1},
 {'bucket': datetime.datetime(2024, 2, 1, 13, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'view',
  'event_count': 1},
 {'bucket': datetime.datetime(2024, 2, 1, 14, 0, tzinfo=datetime.timezone.utc),
  'event_type': 'cart',
  'event_count': 1}]


In [70]:
query = """
    WITH cte AS (
        SELECT
            time_bucket('1 day', event_time) AS day_bucket,
            count(*) AS rolling_daily_count
        FROM t_user_events
        GROUP BY day_bucket)
    SELECT
        time_bucket('7 days', day_bucket) AS week_bucket,
        avg(rolling_daily_count) AS rolling_weekly_avg
    FROM cte
    GROUP BY week_bucket"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'week_bucket': datetime.datetime(2020, 9, 21, 0, 0, tzinfo=datetime.timezone.utc),
  'rolling_weekly_avg': Decimal('3455.5000000000000000')},
 {'week_bucket': datetime.datetime(2020, 9, 28, 0, 0, tzinfo=datetime.timezone.utc),
  'rolling_weekly_avg': Decimal('4521.5714285714285714')},
 {'week_bucket': datetime.datetime(2020, 10, 5, 0, 0, tzinfo=datetime.timezone.utc),
  'rolling_weekly_avg': Decimal('4401.4285714285714286')},
 {'week_bucket': datetime.datetime(2020, 10, 12, 0, 0, tzinfo=datetime.timezone.utc),
  'rolling_weekly_avg': Decimal('5093.5714285714285714')},
 {'week_bucket': datetime.datetime(2020, 10, 19, 0, 0, tzinfo=datetime.timezone.utc),
  'rolling_weekly_avg': Decimal('5898.0000000000000000')}]


In [71]:
query = """
    DROP MATERIALIZED VIEW IF EXISTS mvw_event_stats;"""
res = execute_non_trans_query(query, fetch_size=0)

In [72]:
query = """
    CREATE MATERIALIZED VIEW mvw_event_stats
    WITH (timescaledb.continuous) AS
    SELECT
        time_bucket('1 month', event_time) AS month_bucket,
        category_code,
        event_type,
        COUNT(*) AS event_count,
        AVG(price) AS price_avg
    FROM t_user_events
    WHERE category_code IS NOT NULL AND category_code <> ''
    GROUP BY month_bucket, category_code, event_type;"""
res = execute_non_trans_query(query, fetch_size=0)

In [73]:
query = """
    SELECT *
    FROM mvw_event_stats"""
res = execute_trans_query(query, fetch_size=3)
pprint(res)

[{'month_bucket': datetime.datetime(2020, 12, 1, 0, 0, tzinfo=datetime.timezone.utc),
  'category_code': 'computers.components.network_adapter',
  'event_type': 'view',
  'event_count': 275,
  'price_avg': Decimal('34.2571636363636364')},
 {'month_bucket': datetime.datetime(2020, 12, 1, 0, 0, tzinfo=datetime.timezone.utc),
  'category_code': 'computers.components.motherboard',
  'event_type': 'view',
  'event_count': 4_003,
  'price_avg': Decimal('90.1486285286035473')},
 {'month_bucket': datetime.datetime(2020, 12, 1, 0, 0, tzinfo=datetime.timezone.utc),
  'category_code': 'computers.components.cdrw',
  'event_type': 'cart',
  'event_count': 7,
  'price_avg': Decimal('80.8014285714285714')}]


In [74]:
query = """
    SELECT
        time_bucket('15 minutes', event_time) AS fifteen_min_bucket,
        COUNT(DISTINCT product_id),
        AVG(price) AS price_avg
    FROM t_user_events
    GROUP BY fifteen_min_bucket, category_code, event_type"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'fifteen_min_bucket': datetime.datetime(2020, 9, 24, 11, 45, tzinfo=datetime.timezone.utc),
  'count': 5,
  'price_avg': Decimal('23.5150000000000000')},
 {'fifteen_min_bucket': datetime.datetime(2020, 9, 24, 11, 45, tzinfo=datetime.timezone.utc),
  'count': 1,
  'price_avg': Decimal('17.1600000000000000')},
 {'fifteen_min_bucket': datetime.datetime(2020, 9, 24, 11, 45, tzinfo=datetime.timezone.utc),
  'count': 1,
  'price_avg': Decimal('215.4100000000000000')},
 {'fifteen_min_bucket': datetime.datetime(2020, 9, 24, 11, 45, tzinfo=datetime.timezone.utc),
  'count': 1,
  'price_avg': Decimal('53.1400000000000000')},
 {'fifteen_min_bucket': datetime.datetime(2020, 9, 24, 11, 45, tzinfo=datetime.timezone.utc),
  'count': 1,
  'price_avg': Decimal('113.8100000000000000')}]


In [75]:
query = """
    CREATE TABLE t_product (
        product_id INT,
        product_name VARCHAR(100)
    );"""
res = execute_trans_query(query, fetch_size=0)

In [76]:
query = """
    INSERT INTO t_product (product_id, product_name)
    VALUES
        (123, 'Waldo'),
        (124, 'Sherry');"""
res = execute_trans_query(query, fetch_size=0)

In [77]:
query = """
    SELECT
        time_bucket('1 day', e.event_time) AS day_bucket,
        e.product_id,
        p.product_name,
        e.event_type,
        COUNT(*) event_count
    FROM t_user_events e
    INNER JOIN t_product p ON p.product_id = e.product_id
    GROUP BY day_bucket, e.product_id, p.product_name, e.event_type"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'day_bucket': datetime.datetime(2024, 2, 1, 0, 0, tzinfo=datetime.timezone.utc),
  'product_id': 123,
  'product_name': 'Waldo',
  'event_type': 'purchase',
  'event_count': 1},
 {'day_bucket': datetime.datetime(2024, 2, 1, 0, 0, tzinfo=datetime.timezone.utc),
  'product_id': 124,
  'product_name': 'Sherry',
  'event_type': 'view',
  'event_count': 1}]


In [78]:
query = """
    DROP FUNCTION IF EXISTS calculate_day_average;
    CREATE FUNCTION
        calculate_day_average(
        start_time TIMESTAMPTZ DEFAULT NULL,
        end_time TIMESTAMPTZ DEFAULT NULL)
    RETURNS
        TABLE (
            tstamp TIMESTAMPTZ,
            revenue NUMERIC) AS $$
    BEGIN
        RETURN QUERY
            SELECT
                time_bucket('1 day', event_time) AS day_bucket,
                sum(price) revenue
            FROM t_user_events
            WHERE event_type = 'purchase'
                AND (event_time >= start_time OR start_time IS NULL)
                AND (event_time < end_time OR end_time IS NULL)
            GROUP BY day_bucket;
    END;
    $$ LANGUAGE plpgsql;"""
res = execute_trans_query(query, fetch_size=0)

In [79]:
query = """
    SELECT calculate_day_average()"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'calculate_day_average': '("2020-09-24 00:00:00+00",9563.67)'},
 {'calculate_day_average': '("2020-09-25 00:00:00+00",13025.72)'},
 {'calculate_day_average': '("2020-09-26 00:00:00+00",11821.97)'},
 {'calculate_day_average': '("2020-09-27 00:00:00+00",7910.53)'},
 {'calculate_day_average': '("2020-09-28 00:00:00+00",15120.82)'}]


In [80]:
query = """
    SELECT event_time, price
    FROM t_user_events
    WHERE event_type = 'purchase'
        AND event_time >= '2024-01-01'"""
res = execute_trans_query(query, fetch_size=10)
print(*[v for dct in res for _, v in dct.items()], sep="\n")

2024-02-01 12:34:56+00:00
599.99


In [81]:
query = """
    SELECT calculate_day_average('2024-01-01')"""
res = execute_trans_query(query, fetch_size=5)
pprint(res)

[{'calculate_day_average': '("2024-02-01 00:00:00+00",599.99)'}]


## DB-Breaking Statements

### Setup

In [82]:
query = """
    CREATE TABLE t_user_events_month_chunk (
        event_time TIMESTAMPTZ,
        event_type VARCHAR(100),
        product_id INT,
        category_id BIGINT,
        category_code VARCHAR(100),
        brand VARCHAR(100),
        price NUMERIC,
        user_id BIGINT,
        user_session VARCHAR(100)
    );"""
res = execute_trans_query(query, fetch_size=0)

In [83]:
print_chunk_info(table_name="t_user_events_month_chunk")

Current chunks info:
[]


In [84]:
query = """
    SELECT create_hypertable(
                't_user_events_month_chunk',
                'event_time',
                chunk_time_interval => INTERVAL '1 month'
    );"""
res = execute_trans_query(query, fetch_size=0)

In [85]:
query = """
    INSERT INTO t_user_events_month_chunk
    SELECT * FROM t_user_events;"""
res = execute_trans_query(query, fetch_size=0)

In [86]:
query = """
    SELECT drop_chunks(
            't_user_events_month_chunk',
            older_than => INTERVAL '6 months');"""
res = execute_trans_query(query, fetch_size=0)

In [87]:
sleep(60)
print_count(table_name="t_user_events_month_chunk")

Count: 0


In [88]:
print_chunk_info(table_name="t_user_events_month_chunk")

Current chunks info:
[]


In [89]:
print_compression_settings()

Compression settings:
[]


In [90]:
query = """
    ALTER TABLE t_user_events SET (
        timescaledb.compress,
        timescaledb.compress_orderby = 'event_time DESC',
        timescaledb.compress_segmentby = 'user_id, product_id');"""
res = execute_trans_query(query, fetch_size=0)

In [91]:
query = """
    SELECT add_compression_policy(
        't_user_events',
        INTERVAL '30 days');"""
res = execute_trans_query(query, fetch_size=0)

In [92]:
print_compression_settings()

Compression settings:
[{'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'attname': 'user_id',
  'segmentby_column_index': 1,
  'orderby_column_index': None,
  'orderby_asc': None,
  'orderby_nullsfirst': None},
 {'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'attname': 'product_id',
  'segmentby_column_index': 2,
  'orderby_column_index': None,
  'orderby_asc': None,
  'orderby_nullsfirst': None},
 {'hypertable_schema': 'public',
  'hypertable_name': 't_user_events',
  'attname': 'event_time',
  'segmentby_column_index': None,
  'orderby_column_index': 1,
  'orderby_asc': False,
  'orderby_nullsfirst': True}]


In [93]:
sleep(60)
print_chunk_info(fetch_size=3)

Current chunks info:
[{'chunk_name': '_hyper_1_1_chunk',
  'is_compressed': True,
  'range_start': datetime.datetime(2020, 9, 24, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_2_chunk',
  'is_compressed': True,
  'range_start': datetime.datetime(2020, 9, 25, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc)},
 {'chunk_name': '_hyper_1_3_chunk',
  'is_compressed': True,
  'range_start': datetime.datetime(2020, 9, 26, 0, 0, tzinfo=datetime.timezone.utc),
  'range_end': datetime.datetime(2020, 9, 27, 0, 0, tzinfo=datetime.timezone.utc)}]


In [94]:
print_count()

Count: 885_132


In [95]:
query = """
    SELECT add_retention_policy(
                't_user_events',
                INTERVAL '100 years',
                if_not_exists => TRUE);"""
res = execute_trans_query(query, fetch_size=0)

In [96]:
print_count()

Count: 885_132


In [97]:
query = """
    SELECT remove_retention_policy('t_user_events');"""
res = execute_trans_query(query, fetch_size=0)

In [98]:
query = """
    SELECT add_retention_policy(
                't_user_events',
                INTERVAL '1 hour',
                if_not_exists => TRUE);"""
res = execute_trans_query(query, fetch_size=0)

In [99]:
sleep(60)
print_count()

Count: 0
