timescale · sb230132 · Aug 9, 2022 · Aug 15, 2022 · Aug 16, 2022 · Aug 18, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2337,6 +2337,7 @@ complete, depending on the size of your database**
 **Thanks**
 * @yadid for reporting a segfault (fixed in 50c8c4c)
 * @ryan-shaw for reporting tuples not being correctly converted to a chunk's rowtype (fixed in 645b530)
+* @yuezhihan for reporting GROUP BY error when setting compress_segmentby with an enum column
 
 ## 0.4.0 (2017-08-21)
 
@@ -2495,3 +2496,6 @@ the next release.
 * [72f754a] use PostgreSQL's own `hash_any` function as default partfunc (thanks @robin900)
 * [39f4c0f] Remove sample data instructions and point to docs site
 * [9015314] Revised the `get_general_index_definition` function to handle cases where indexes have definitions other than just `CREATE INDEX` (thanks @bricklen)
+
+**Bugfixes**
+* #4619 Improve handling enum columns in compressed hypertables
diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
@@ -177,6 +177,8 @@ build_compressed_scan_pathkeys(SortInfo *sort_info, PlannerInfo *root, List *chu
 		ListCell *lc;
 		char *column_name;
 		Oid sortop;
+		Oid opfamily, opcintype;
+		int16 strategy;
 
 		for (lc = list_head(chunk_pathkeys);
 			 lc != NULL && bms_num_members(segmentby_columns) < info->num_segmentby_columns;
@@ -210,6 +212,20 @@ build_compressed_scan_pathkeys(SortInfo *sort_info, PlannerInfo *root, List *chu
 
 			sortop =
 				get_opfamily_member(pk->pk_opfamily, var->vartype, var->vartype, pk->pk_strategy);
+			if (!get_ordering_op_properties(sortop, &opfamily, &opcintype, &strategy))
+			{
+				if (type_is_enum(var->vartype))
+				{
+					sortop = get_opfamily_member(pk->pk_opfamily,
+												 ANYENUMOID,
+												 ANYENUMOID,
+												 pk->pk_strategy);
+				}
+				else
+				{
+					elog(ERROR, "sort operator lookup failed for column \"%s\"", column_name);
+				}
+			}
 			pk = make_pathkey_from_compressed(root,
 											  info->compressed_rel->relid,
 											  (Expr *) var,

diff --git a/tsl/test/expected/compression_errors.out b/tsl/test/expected/compression_errors.out
@@ -545,3 +545,67 @@ NOTICE:  column "medium" of relation "metric" already exists, skipping
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
 ERROR:  column "medium_1" of relation "metric" already exists
+--github issue 3481
+--GROUP BY error when setting compress_segmentby with an enum column
+CREATE TYPE an_enum_type AS ENUM ('home', 'school');
+CREATE TABLE test (
+	time timestamp NOT NULL,
+	enum_col an_enum_type NOT NULL
+);
+SELECT create_hypertable(
+    'test', 'time'
+);
+ create_hypertable  
+--------------------
+ (29,public,test,t)
+(1 row)
+
+INSERT INTO test VALUES ('2001-01-01 00:00', 'home'),
+                        ('2001-01-01 01:00', 'school'),
+                        ('2001-01-01 02:00', 'home');
+--enable compression on enum_col
+ALTER TABLE test SET (
+	timescaledb.compress,
+	timescaledb.compress_segmentby = 'enum_col',
+	timescaledb.compress_orderby = 'time'
+);
+--below queries will pass before chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+ ?column? 
+----------
+        1
+        1
+(2 rows)
+
+EXPLAIN SELECT DISTINCT 1 FROM test;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Unique  (cost=0.00..50.80 rows=1 width=4)
+   ->  Result  (cost=0.00..50.80 rows=2040 width=4)
+         ->  Seq Scan on _hyper_29_19_chunk  (cost=0.00..30.40 rows=2040 width=0)
+(3 rows)
+
+--compress chunks
+SELECT COMPRESS_CHUNK(X) FROM SHOW_CHUNKS('test') X;
+              compress_chunk              
+------------------------------------------
+ _timescaledb_internal._hyper_29_19_chunk
+(1 row)
+
+--below query should pass after chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+ ?column? 
+----------
+        1
+        1
+(2 rows)
+
+EXPLAIN SELECT DISTINCT 1 FROM test;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Unique  (cost=0.51..21.02 rows=1 width=4)
+   ->  Result  (cost=0.51..21.02 rows=2000 width=4)
+         ->  Custom Scan (DecompressChunk) on _hyper_29_19_chunk  (cost=0.51..1.02 rows=2000 width=0)
+               ->  Seq Scan on compress_hyper_30_20_chunk  (cost=0.00..1.02 rows=2 width=4)
+(4 rows)
+
diff --git a/tsl/test/sql/compression_errors.sql b/tsl/test/sql/compression_errors.sql
@@ -312,4 +312,39 @@ ALTER TABLE metric ADD COLUMN IF NOT EXISTS "medium" VARCHAR ;
 ALTER TABLE metric ADD COLUMN IF NOT EXISTS "medium" VARCHAR ;
 -- also add one without IF NOT EXISTS 
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
-ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
+ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
+
+--github issue 3481
+--GROUP BY error when setting compress_segmentby with an enum column
+
+CREATE TYPE an_enum_type AS ENUM ('home', 'school');
+
+CREATE TABLE test (
+	time timestamp NOT NULL,
+	enum_col an_enum_type NOT NULL
+);
+
+SELECT create_hypertable(
+    'test', 'time'
+);
+INSERT INTO test VALUES ('2001-01-01 00:00', 'home'),
+                        ('2001-01-01 01:00', 'school'),
+                        ('2001-01-01 02:00', 'home');
+
+--enable compression on enum_col
+ALTER TABLE test SET (
+	timescaledb.compress,
+	timescaledb.compress_segmentby = 'enum_col',
+	timescaledb.compress_orderby = 'time'
+);
+
+--below queries will pass before chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+EXPLAIN SELECT DISTINCT 1 FROM test;
+
+--compress chunks
+SELECT COMPRESS_CHUNK(X) FROM SHOW_CHUNKS('test') X;
+
+--below query should pass after chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+EXPLAIN SELECT DISTINCT 1 FROM test;