Skip to content

Commit

Permalink
Better accounting for number of items stored in a subspace
Browse files Browse the repository at this point in the history
We add better accounting for number of items stored in a subspace
to allow better pruning. Instead of pruning based on the number of
dimension_slices in subsequent dimensions we now track number of total
items in the subspace store and prune based on that.

We add two GUC variables:
1) max_open_chunks_per_insert (default work_mem in bytes / 512. This
assumes an entry is 512 bytes)
2) max_cached_chunks_per_hypertable (default 100). Maximum cached chunks per
hypertable.
  • Loading branch information
cevian committed Jan 10, 2018
1 parent 12f92ea commit ad7d361
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 39 deletions.
3 changes: 2 additions & 1 deletion src/chunk_dispatch.c
Expand Up @@ -9,6 +9,7 @@
#include "chunk_insert_state.h"
#include "subspace_store.h"
#include "dimension.h"
#include "guc.h"

ChunkDispatch *
chunk_dispatch_create(Hypertable *ht, EState *estate, Query *parse)
Expand All @@ -19,7 +20,7 @@ chunk_dispatch_create(Hypertable *ht, EState *estate, Query *parse)
cd->estate = estate;
cd->hypertable_result_rel_info = NULL;
cd->parse = parse;
cd->cache = subspace_store_init(ht->space->num_dimensions, estate->es_query_cxt, 0);
cd->cache = subspace_store_init(ht->space, estate->es_query_cxt, guc_max_open_chunks_per_insert);

return cd;
}
Expand Down
9 changes: 9 additions & 0 deletions src/dimension_vector.c
Expand Up @@ -138,6 +138,15 @@ dimension_vec_find_slice_index(DimensionVec *vec, int32 dimension_slice_id)
return -1;
}

DimensionSlice *
dimension_vec_get(DimensionVec *vec, int32 index)
{
if (index >= vec->num_slices)
return NULL;

return vec->slices[index];
}

void
dimension_vec_free(DimensionVec *vec)
{
Expand Down
1 change: 1 addition & 0 deletions src/dimension_vector.h
Expand Up @@ -29,6 +29,7 @@ extern DimensionVec *dimension_vec_add_slice(DimensionVec **vecptr, DimensionSli
extern void dimension_vec_remove_slice(DimensionVec **vecptr, int32 index);
extern DimensionSlice *dimension_vec_find_slice(DimensionVec *vec, int64 coordinate);
extern int dimension_vec_find_slice_index(DimensionVec *vec, int32 dimension_slice_id);
extern DimensionSlice *dimension_vec_get(DimensionVec *vec, int32 index);
extern void dimension_vec_free(DimensionVec *vec);

#endif /* TIMESCALEDB_DIMENSION_VECTOR_H */
40 changes: 40 additions & 0 deletions src/guc.c
@@ -1,12 +1,23 @@
#include <postgres.h>
#include <utils/guc.h>
#include <miscadmin.h>

#include "guc.h"
#include "hypertable_cache.h"

bool guc_disable_optimizations = false;
bool guc_optimize_non_hypertables = false;
bool guc_restoring = false;
bool guc_constraint_aware_append = true;
int guc_max_open_chunks_per_insert = 10;
int guc_max_cached_chunks_per_hypertable = 10;

static void
assign_max_cached_chunks_per_hypertable_hook(int newval, void *extra)
{
/* invalidate the hypertable cache to reset */
hypertable_cache_invalidate_callback();
}

void
_guc_init(void)
Expand Down Expand Up @@ -51,6 +62,35 @@ _guc_init(void)
NULL,
NULL,
NULL);

DefineCustomIntVariable("timescaledb.max_open_chunks_per_insert",
"Maximum open chunks per insert",
"Maximum number of open chunk tables per insert",
&guc_max_open_chunks_per_insert,
work_mem * 1024L / 512L, /* Assume each chunk
* takes up 512 bytes
* (work_mem is in
* kbytes) */
0,
65536,
PGC_USERSET,
0,
NULL,
NULL,
NULL);

DefineCustomIntVariable("timescaledb.max_cached_chunks_per_hypertable",
"Maximum cached chunks",
"Maximum number of chunks stored in the cache",
&guc_max_cached_chunks_per_hypertable,
100,
0,
65536,
PGC_USERSET,
0,
NULL,
assign_max_cached_chunks_per_hypertable_hook,
NULL);
}

void
Expand Down
2 changes: 2 additions & 0 deletions src/guc.h
Expand Up @@ -6,6 +6,8 @@ extern bool guc_disable_optimizations;
extern bool guc_optimize_non_hypertables;
extern bool guc_constraint_aware_append;
extern bool guc_restoring;
extern int guc_max_open_chunks_per_insert;
extern int guc_max_cached_chunks_per_hypertable;

void _guc_init(void);
void _guc_fini(void);
Expand Down
3 changes: 2 additions & 1 deletion src/hypertable.c
Expand Up @@ -23,6 +23,7 @@
#include "dimension_slice.h"
#include "dimension_vector.h"
#include "hypercube.h"
#include "guc.h"

static Oid
rel_get_owner(Oid relid)
Expand Down Expand Up @@ -77,7 +78,7 @@ hypertable_from_tuple(HeapTuple tuple)
namespace_oid = get_namespace_oid(NameStr(h->fd.schema_name), false);
h->main_table_relid = get_relname_relid(NameStr(h->fd.table_name), namespace_oid);
h->space = dimension_scan(h->fd.id, h->main_table_relid, h->fd.num_dimensions);
h->chunk_cache = subspace_store_init(h->space->num_dimensions, CurrentMemoryContext, 1);
h->chunk_cache = subspace_store_init(h->space, CurrentMemoryContext, guc_max_cached_chunks_per_hypertable);

return h;
}
Expand Down
114 changes: 80 additions & 34 deletions src/subspace_store.c
Expand Up @@ -14,46 +14,80 @@
* for the N dimensions. The leaf DimensionSlice points to the data being stored.
*
* */

typedef struct SubspaceStoreInternalNode
{
DimensionVec *vector;
size_t descendants;
bool last_internal_node;
} SubspaceStoreInternalNode;

typedef struct SubspaceStore
{
MemoryContext mcxt;
int16 num_dimensions;
/* limit growth of store by limiting number of slices in first dimension, 0 for no limit */
int16 max_slices_first_dimension;
DimensionVec *origin; /* origin of the tree */
int16 max_items;
SubspaceStoreInternalNode *origin; /* origin of the tree */
} SubspaceStore;

static inline DimensionVec *
subspace_store_dimension_create()
static inline SubspaceStoreInternalNode *
subspace_store_internal_node_create(bool last_internal_node)
{
return dimension_vec_create(DIMENSION_VEC_DEFAULT_SIZE);
SubspaceStoreInternalNode *node = palloc(sizeof(SubspaceStoreInternalNode));

node->vector = dimension_vec_create(DIMENSION_VEC_DEFAULT_SIZE);
node->descendants = 0;
node->last_internal_node = last_internal_node;
return node;
}

static inline void
subspace_store_internal_node_free(void *node)
{
dimension_vec_free(((SubspaceStoreInternalNode *) node)->vector);
pfree(node);
}

static size_t
subspace_store_internal_node_descendants(SubspaceStoreInternalNode *node, int index)
{
DimensionSlice *slice = dimension_vec_get(node->vector, index);

if (slice == NULL)
return 0;

if (node->last_internal_node)
return 1;

return ((SubspaceStoreInternalNode *) slice->storage)->descendants;
}

SubspaceStore *
subspace_store_init(int16 num_dimensions, MemoryContext mcxt, int16 max_slices_first_dimension)
subspace_store_init(Hyperspace *space, MemoryContext mcxt, int16 max_items)
{
MemoryContext old = MemoryContextSwitchTo(mcxt);
SubspaceStore *sst = palloc(sizeof(SubspaceStore));

sst->origin = subspace_store_dimension_create();
sst->num_dimensions = num_dimensions;
sst->max_slices_first_dimension = max_slices_first_dimension;
/*
* make sure that the first dimension is a time dimension, otherwise the
* tree will grow in a way that makes prunning less effective.
*/
Assert(space->num_dimensions < 1 || space->dimensions[0].type == DIMENSION_TYPE_OPEN);

sst->origin = subspace_store_internal_node_create(space->num_dimensions == 1);
sst->num_dimensions = space->num_dimensions;
sst->max_items = max_items;
sst->mcxt = mcxt;
MemoryContextSwitchTo(old);
return sst;
}

static inline void
subspace_store_free_internal_node(void *node)
{
dimension_vec_free((DimensionVec *) node);
}

void
subspace_store_add(SubspaceStore *store, const Hypercube *hc,
void *object, void (*object_free) (void *))
{
DimensionVec **vecptr = &store->origin;
SubspaceStoreInternalNode *node = store->origin;
DimensionSlice *last = NULL;
MemoryContext old = MemoryContextSwitchTo(store->mcxt);
int i;
Expand All @@ -64,46 +98,58 @@ subspace_store_add(SubspaceStore *store, const Hypercube *hc,
{
const DimensionSlice *target = hc->slices[i];
DimensionSlice *match;
DimensionVec *vec = *vecptr;

Assert(target->storage == NULL);

if (vec == NULL)
if (node == NULL)
{
Assert(last != NULL);
last->storage = subspace_store_dimension_create();
last->storage_free = subspace_store_free_internal_node;
vec = last->storage;
last->storage = subspace_store_internal_node_create(i == hc->num_slices - 1);
last->storage_free = subspace_store_internal_node_free;
node = last->storage;
}

Assert(0 == vec->num_slices ||
vec->slices[0]->fd.dimension_id == target->fd.dimension_id);
node->descendants += 1;

match = dimension_vec_find_slice(vec, target->fd.range_start);
Assert(0 == node->vector->num_slices ||
node->vector->slices[0]->fd.dimension_id == target->fd.dimension_id);

match = dimension_vec_find_slice(node->vector, target->fd.range_start);

if (match == NULL)
{
DimensionSlice *copy;

if (store->max_slices_first_dimension > 0 && i == 0 && vec->num_slices >= store->max_slices_first_dimension)
if (store->max_items > 0 && i == 0 && node->descendants > store->max_items)
{
/*
* At dimension 0 only keep store->max_slices_first_dimension
* slices. This is to prevent this store from growing too
* large. Always delete the oldest.
* large. Always delete the oldest. Note that we made sure
* that the first dimension is a time dimension when creating
* the subspace_store.
*/
size_t items_removed = subspace_store_internal_node_descendants(node, 0);

Assert(store->max_items + 1 == node->descendants);

dimension_vec_remove_slice(&node->vector, 0);

/*
* Note we would have to do this to ancestors if this was not
* the root.
*/
Assert(store->max_slices_first_dimension == vec->num_slices);
dimension_vec_remove_slice(vecptr, 0);
node->descendants -= items_removed;
}
copy = dimension_slice_copy(target);

dimension_vec_add_slice_sort(vecptr, copy);
dimension_vec_add_slice_sort(&node->vector, copy);
match = copy;
}

last = match;
/* internal nodes point to the next dimension's vector */
vecptr = (DimensionVec **) &last->storage;
/* internal slices point to the next SubspaceStoreInternalNode */
node = last->storage;
}

Assert(last != NULL && last->storage == NULL);
Expand All @@ -117,7 +163,7 @@ void *
subspace_store_get(SubspaceStore *store, Point *target)
{
int i;
DimensionVec *vec = store->origin;
DimensionVec *vec = store->origin->vector;
DimensionSlice *match = NULL;

Assert(target->cardinality == store->num_dimensions);
Expand All @@ -129,7 +175,7 @@ subspace_store_get(SubspaceStore *store, Point *target)
if (NULL == match)
return NULL;

vec = match->storage;
vec = ((SubspaceStoreInternalNode *) match->storage)->vector;
}
Assert(match != NULL);
return match->storage;
Expand All @@ -138,7 +184,7 @@ subspace_store_get(SubspaceStore *store, Point *target)
void
subspace_store_free(SubspaceStore *store)
{
dimension_vec_free(store->origin);
subspace_store_internal_node_free(store->origin);
pfree(store);
}

Expand Down
3 changes: 2 additions & 1 deletion src/subspace_store.h
Expand Up @@ -2,6 +2,7 @@
#define TIMESCALEDB_SUBSPACE_STORE_H

#include <postgres.h>
#include "dimension.h"

/* A subspace store allows you to save data associated with
* a multidimensional-subspace. Subspaces are defined conceptually
Expand All @@ -13,7 +14,7 @@ typedef struct Hypercube Hypercube;
typedef struct Point Point;
typedef struct SubspaceStore SubspaceStore;

extern SubspaceStore *subspace_store_init(int16 num_dimensions, MemoryContext mcxt, int16 max_slices_first_dimension);
extern SubspaceStore *subspace_store_init(Hyperspace *space, MemoryContext mcxt, int16 max_items);

/* Store an object associate with the subspace represented by a hypercube */
extern void subspace_store_add(SubspaceStore *cache, const Hypercube *hc,
Expand Down
46 changes: 46 additions & 0 deletions test/expected/insert_single.out
Expand Up @@ -443,3 +443,49 @@ SELECT * FROM test_tz;
Thu Sep 21 19:01:00 2017 | 21.2
(3 rows)

-- test various memory settings --
SET timescaledb.max_open_chunks_per_insert = 10;
SET timescaledb.max_cached_chunks_per_hypertable = 10;
CREATE TABLE "nondefault_mem_settings"(time timestamp PRIMARY KEY, temp float);
SELECT create_hypertable('"nondefault_mem_settings"', 'time', chunk_time_interval=> INTERVAL '1 Month');
create_hypertable
-------------------

(1 row)

INSERT INTO "nondefault_mem_settings" VALUES('2000-12-01T19:00:00', 21.2);
INSERT INTO "nondefault_mem_settings" VALUES('2001-12-20T09:00:00', 25.1);
--lowest possible
SET timescaledb.max_open_chunks_per_insert = 1;
SET timescaledb.max_cached_chunks_per_hypertable = 1;
INSERT INTO "nondefault_mem_settings" VALUES
('2001-01-20T09:00:00', 26.6),
('2002-02-20T09:00:00', 27.9),
('2003-02-20T09:00:00', 28.9);
INSERT INTO "nondefault_mem_settings" VALUES
('2001-03-20T09:00:00', 30.6),
('2002-03-20T09:00:00', 31.9),
('2003-03-20T09:00:00', 32.9);
--unlimited
SET timescaledb.max_open_chunks_per_insert = 0;
SET timescaledb.max_cached_chunks_per_hypertable = 0;
INSERT INTO "nondefault_mem_settings" VALUES
('2001-04-20T09:00:00', 33.6),
('2002-04-20T09:00:00', 34.9),
('2003-04-20T09:00:00', 35.9);
SELECT * FROM "nondefault_mem_settings";
time | temp
--------------------------+------
Fri Dec 01 19:00:00 2000 | 21.2
Thu Dec 20 09:00:00 2001 | 25.1
Sat Jan 20 09:00:00 2001 | 26.6
Wed Feb 20 09:00:00 2002 | 27.9
Thu Feb 20 09:00:00 2003 | 28.9
Tue Mar 20 09:00:00 2001 | 30.6
Wed Mar 20 09:00:00 2002 | 31.9
Thu Mar 20 09:00:00 2003 | 32.9
Fri Apr 20 09:00:00 2001 | 33.6
Sat Apr 20 09:00:00 2002 | 34.9
Sun Apr 20 09:00:00 2003 | 35.9
(11 rows)

0 comments on commit ad7d361

Please sign in to comment.