Skip to content

Commit

Permalink
New parser (#682)
Browse files Browse the repository at this point in the history
Add a new faster parser.
  • Loading branch information
ohler55 committed Aug 8, 2021
1 parent 4fdd25d commit 37fce4b
Show file tree
Hide file tree
Showing 35 changed files with 4,766 additions and 366 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Expand Up @@ -50,7 +50,7 @@ BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/CI.yml
Expand Up @@ -31,11 +31,10 @@ jobs:
exclude:
- os: macos
ruby: head
- os: macos
ruby: '3.0'
- os: macos
ruby: '2.5'
- ruby: '3.0'
gemfile: rails_5
- ruby: '3.0'
gemfile: rails_6

env:
BUNDLE_GEMFILE: gemfiles/${{ matrix.gemfile }}.gemfile
Expand Down
9 changes: 9 additions & 0 deletions ext/oj/buf.h
Expand Up @@ -19,6 +19,10 @@ inline static void buf_init(Buf buf) {
buf->tail = buf->head;
}

inline static void buf_reset(Buf buf) {
buf->tail = buf->head;
}

inline static void buf_cleanup(Buf buf) {
if (buf->base != buf->head) {
xfree(buf->head);
Expand All @@ -29,6 +33,11 @@ inline static size_t buf_len(Buf buf) {
return buf->tail - buf->head;
}

inline static const char *buf_str(Buf buf) {
*buf->tail = '\0';
return buf->head;
}

inline static void buf_append_string(Buf buf, const char *s, size_t slen) {
if (buf->end <= buf->tail + slen) {
size_t len = buf->end - buf->head;
Expand Down
187 changes: 187 additions & 0 deletions ext/oj/cache.c
@@ -0,0 +1,187 @@
// Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
// Licensed under the MIT License. See LICENSE file in the project root for license details.

#include "cache.h"

#define REHASH_LIMIT 64
#define MIN_SHIFT 8

typedef struct _slot {
struct _slot *next;
VALUE val;
uint32_t hash;
uint8_t klen;
char key[CACHE_MAX_KEY];
} * Slot;

typedef struct _cache {
Slot * slots;
size_t cnt;
VALUE (*form)(const char *str, size_t len);
uint32_t size;
uint32_t mask;
bool mark;
} * Cache;

// almost the Murmur hash algorithm
#define M 0x5bd1e995
#define C1 0xCC9E2D51
#define C2 0x1B873593
#define N 0xE6546B64

void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
c->form = form;
}

#if 0
// For debugging only.
static void cache_print(Cache c) {
for (uint32_t i = 0; i < c->size; i++) {
printf("%4d:", i);
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
char buf[40];
strncpy(buf, s->key, s->klen);
buf[s->klen] = '\0';
printf(" %s", buf);
}
printf("\n");
}
}
#endif

static uint32_t hash_calc(const uint8_t *key, size_t len) {
const uint8_t *end = key + len;
const uint8_t *endless = key + (len & 0xFFFFFFFC);
uint32_t h = (uint32_t)len;
uint32_t k;

while (key < endless) {
k = (uint32_t)*key++;
k |= (uint32_t)*key++ << 8;
k |= (uint32_t)*key++ << 16;
k |= (uint32_t)*key++ << 24;

k *= M;
k ^= k >> 24;
h *= M;
h ^= k * M;
}
if (1 < end - key) {
uint16_t k16 = (uint16_t)*key++;

k16 |= (uint16_t)*key++ << 8;
h ^= k16 << 8;
}
if (key < end) {
h ^= *key;
}
h *= M;
h ^= h >> 13;
h *= M;
h ^= h >> 15;

return h;
}

Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark) {
Cache c = ALLOC(struct _cache);
int shift = 0;

for (; REHASH_LIMIT < size; size /= 2, shift++) {
}
if (shift < MIN_SHIFT) {
shift = MIN_SHIFT;
}
c->size = 1 << shift;
c->mask = c->size - 1;
c->slots = ALLOC_N(Slot, c->size);
memset(c->slots, 0, sizeof(Slot) * c->size);
c->form = form;
c->cnt = 0;
c->mark = mark;

return c;
}

static void rehash(Cache c) {
uint32_t osize = c->size;

c->size = osize * 4;
c->mask = c->size - 1;
REALLOC_N(c->slots, Slot, c->size);
memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);

Slot *end = c->slots + osize;
for (Slot *sp = c->slots; sp < end; sp++) {
Slot s = *sp;
Slot next = NULL;

*sp = NULL;
for (; NULL != s; s = next) {
next = s->next;

uint32_t h = s->hash & c->mask;
Slot * bucket = c->slots + h;

s->next = *bucket;
*bucket = s;
}
}
}

void cache_free(Cache c) {
for (uint32_t i = 0; i < c->size; i++) {
Slot next;
for (Slot s = c->slots[i]; NULL != s; s = next) {
next = s->next;
xfree(s);
}
}
xfree(c->slots);
xfree(c);
}

void cache_mark(Cache c) {
if (c->mark) {
for (uint32_t i = 0; i < c->size; i++) {
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
rb_gc_mark(s->val);
}
}
}
}

VALUE
cache_intern(Cache c, const char *key, size_t len) {
if (CACHE_MAX_KEY < len) {
return c->form(key, len);
}
uint32_t h = hash_calc((const uint8_t *)key, len);
Slot * bucket = c->slots + (h & c->mask);
Slot b;
Slot tail = NULL;

for (b = *bucket; NULL != b; b = b->next) {
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
return b->val;
}
tail = b;
}
b = ALLOC(struct _slot);
b->hash = h;
b->next = NULL;
memcpy(b->key, key, len);
b->klen = (uint8_t)len;
b->key[len] = '\0';
b->val = c->form(key, len);
if (NULL == tail) {
*bucket = b;
} else {
tail->next = b;
}
c->cnt++;
if (REHASH_LIMIT < c->cnt / c->size) {
rehash(c);
}
return b->val;
}
20 changes: 20 additions & 0 deletions ext/oj/cache.h
@@ -0,0 +1,20 @@
// Copyright (c) 2021 Peter Ohler. All rights reserved.
// Licensed under the MIT License. See LICENSE file in the project root for license details.

#ifndef CACHE_H
#define CACHE_H

#include <ruby.h>
#include <stdbool.h>

#define CACHE_MAX_KEY 35

struct _cache;

extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
extern void cache_free(struct _cache *c);
extern void cache_mark(struct _cache *c);
extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);

#endif /* CACHE_H */
21 changes: 4 additions & 17 deletions ext/oj/compat.c
Expand Up @@ -5,7 +5,7 @@

#include "encode.h"
#include "err.h"
#include "hash.h"
#include "intern.h"
#include "oj.h"
#include "parse.h"
#include "resolve.h"
Expand Down Expand Up @@ -33,23 +33,10 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
}
} else if (Yes == pi->options.sym_key) {
rkey = oj_sym_intern(key, klen);
} else {
VALUE *slot;

if (Yes == pi->options.sym_key) {
if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
*slot = rkey;
rb_gc_register_address(slot);
}
} else {
if (Qnil == (rkey = oj_str_hash_get(key, klen, &slot))) {
rkey = rb_str_new(key, klen);
rkey = oj_encode(rkey);
*slot = rkey;
rb_gc_register_address(slot);
}
}
rkey = oj_str_intern(key, klen);
}
}
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
Expand Down
2 changes: 1 addition & 1 deletion ext/oj/custom.c
Expand Up @@ -8,7 +8,7 @@
#include "dump.h"
#include "encode.h"
#include "err.h"
#include "hash.h"
#include "intern.h"
#include "odd.h"
#include "oj.h"
#include "parse.h"
Expand Down

0 comments on commit 37fce4b

Please sign in to comment.