Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

238 lines (211 sloc) 7.471 kb
/*!
* \file sortasort.c
*
* \brief sortasort dictionary implementation
*/
/*!
* A "sortasort" is a pre-marshalled *set* (no dups) of values intended for
* append and query operations only (no deletion). It's not a
* particularly smart data structure. Cuckoo hashing would be a
* fancier solution.
*/
/*
* It is structured as a header, followed by a fixed-length "directory"
* (an array of offsets) that point to the actual Datums
* concatenated in a variable-length array at the end of the directory.
* The initial directory entries are sorted in ascending order of the Datums
* they point to, but the last < SORTA_SLOP entries are left unsorted to facilitate
* efficient insertion. Binary Search is used on all but those last entries,
* which must be scanned. At every k*SORTA_SLOP'th insert, the full directory is
* sorted.
*/
//#include <ctype.h>
#include <postgres.h>
#include <fmgr.h>
#include "sortasort.h"
#include "sketch_support.h"
#define SORTASORT_DATA(s) (((char *)(s->dir)) + \
(s->capacity * (sizeof (s->dir[0]))))
/*
* get the ith item stored in sortasort
*/
char *sortasort_getval(sortasort *s, unsigned i) {
char *data_ptr = SORTASORT_DATA(s);
char *res = NULL;
Datum dat;
if (i >= s->num_vals) {
elog(ERROR, "attempt to get item at illegal index %d in sortasort", i);
}
if (s->dir[i] >= s->storage_sz) {
elog(ERROR, "illegal offset %u in sortasort", s->dir[i]);
}
res = data_ptr + s->dir[i];
dat = PointerExtractDatum(res, s->typByVal);
if (s->dir[i]
+ ExtractDatumLen(dat, s->typLen, s->typByVal, s->storage_sz - s->dir[i])
> s->storage_sz) {
elog(ERROR, "value overruns size of sortasort");
}
return res;
}
/*!
* given a pre-allocated sortasort, set up its metadata
* first argument s is the
* \param s a pre-allocated sortasort
* \param capacity size of the sortasort directory
* \param s_sz size of s
* \param typLen Postgres type length (-1 for bytea, -2 for cstring)
*/
sortasort *
sortasort_init(sortasort *s,
size_t capacity,
size_t s_sz,
int typLen,
bool typByVal)
{
/* capacity is the size of the directory: i.e. max number of Datums it can hold */
s->capacity = capacity;
/* storage_sz is the number of bytes available for Datums at the end. */
s->storage_sz = s_sz - sizeof(sortasort) - capacity*sizeof(s->dir[0]);
if (s_sz - sizeof(sortasort) <= capacity*sizeof(s->dir[0]))
elog(
ERROR,
"sortasort initialized too small to hold its own directory");
s->typLen = typLen;
s->typByVal = typByVal;
/* number of values so far */
s->num_vals = 0;
/* offset after the directory to do the next insertion */
s->storage_cur = 0;
return(s);
}
/*! comparison function for qsort_arg */
int sorta_cmp(const void *i, const void *j, void *thunk)
{
/* the "thunk" in this case is the sortasort being sorted */
sortasort *s = (sortasort *)thunk;
char *dat1 = sortasort_getval(s, ((unsigned *)i) - s->dir);
char *dat2 = sortasort_getval(s, ((unsigned *)j) - s->dir);
int len = s->typLen;
int shorter;
/*
* we always use typByVal = true, since we've marshalled the data into place
*/
if (len < 0) {
len = (int)ExtractDatumLen(PointerGetDatum(dat1), len, s->typByVal, -1);
if ((shorter = (len - ExtractDatumLen(PointerGetDatum(dat2), len, s->typByVal, -1))))
/* order by length */
return shorter;
/* else drop through */
}
/* byte ordering */
return memcmp(dat1, dat2, len);
}
/*!
* insert a new element into s_in if there's room and return TRUE
* if not enough room, return FALSE
* \param s_in a sortasort
* \param dat a Datum to insert
* \param len the Postgres typLen
*/
int sortasort_try_insert(sortasort *s_in, Datum dat, int len)
{
/* sanity check */
void *datp = DatumExtractPointer(dat, s_in->typByVal);
/* first check to see if the element is already there */
int found = sortasort_find(s_in, dat);
if (found >= 0 && found < (int)s_in->num_vals) {
/* found! just return TRUE */
return TRUE;
}
len = ExtractDatumLen(dat, len, s_in->typByVal, -1);
/* sanity check */
if (found < -1 || found >= (int) s_in->num_vals)
elog(ERROR,
"invalid offset %d returned by sortasort_find",
found);
/* we need to insert v. return FALSE if not enough space. */
if (s_in->storage_cur + len >= s_in->storage_sz) {
/* caller will have to allocate a bigger one and try again */
return FALSE;
}
/* return -1 if no more capacity */
if (s_in->num_vals >= s_in->capacity)
return -1;
/*
* copy dat to the current storage offset, put a pointer into dir,
* update num_vals and storage_cur
*/
memcpy(SORTASORT_DATA(s_in) + s_in->storage_cur,
datp,
len);
s_in->dir[s_in->num_vals++] = s_in->storage_cur;
s_in->storage_cur += len;
if (s_in->storage_cur > s_in->storage_sz)
elog(ERROR, "went off the end of sortasort storage");
/* re-sort every SORTA_SLOP vals */
if (s_in->num_vals % SORTA_SLOP == 0)
qsort_arg(s_in->dir,
s_in->num_vals,
sizeof(s_in->dir[0]),
sorta_cmp,
(void *)s_in);
return TRUE;
}
/*!
* find items in a sortasort. this involves binary search in the sorted prefix,
* and linear search in the <SORTA_SLOP-sized suffix.
* We assume that the sorted prefix is the
* highest multiple of SORTA_SLOP less than s->num_vals
*
* \param s a sortasort
* \param v a value to find
* \return position in directory where item found, or -1 if not found.
* NOTE: return value 0 does not mean false!! It means it *found* the item
* at position 0
*/
int sortasort_find(sortasort *s, Datum dat)
{
int theguess, diff;
int hi = (s->num_vals/SORTA_SLOP)*SORTA_SLOP;
int themin = 0, themax = hi - 1;
size_t i;
int addend, subtrahend;
size_t len = ExtractDatumLen(dat, s->typLen, s->typByVal, -1);
/* binary search on the front of the sortasort */
if (themax >= (int)s->num_vals) {
elog(ERROR,
"sortasort failure: max = %d, num_vals = %zu",
themax,
s->num_vals);
}
theguess = hi / 2;
while (themin < themax ) {
if (!(diff = memcmp(sortasort_getval(s,theguess),
DatumExtractPointer(dat, s->typByVal),
len)))
return theguess;
if (themin == themax - 1) break;
else if (diff < 0) {
/* undershot */
addend = (themax - theguess) / 2;
if (!addend) addend = 1;
themin = theguess;
theguess += addend;
}
else {
/* overshot */
subtrahend = (theguess - themin) / 2;
if (!subtrahend) subtrahend = 1;
themax = theguess;
theguess -= subtrahend;
}
}
/* if we got here, continue with a naive linear search on the tail */
for (i = hi; i < s->num_vals; i++)
if (!memcmp(sortasort_getval(s, i),
DatumExtractPointer(dat, s->typByVal),
len))
return i;
return -1;
}
Jump to Line
Something went wrong with that request. Please try again.