diff --git a/c/Makefile b/c/Makefile new file mode 100644 index 0000000000..ca20397f5b --- /dev/null +++ b/c/Makefile @@ -0,0 +1,21 @@ +CC=gcc -Wall -std=gnu99 -ggdb + +.PHONY: all clean +all: parsertest + +clean: + make -Bnd | grep 'Must remake target' | \ + sed 's/.*`\(.*\)'\''.*/\1/' | grep -v '^all$$' | \ + xargs rm + + +lexer.yy.c: lexer.l + flex -o lexer.yy.c --header-file=lexer.yy.h lexer.l +lexer.yy.h: lexer.yy.c + +parser.tab.c: parser.y lexer.yy.h + bison -W -d parser.y +parser.tab.h: parser.tab.c + +parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c + $(CC) -o $@ $^ -ljansson diff --git a/c/builtin.c b/c/builtin.c new file mode 100644 index 0000000000..eba81994eb --- /dev/null +++ b/c/builtin.c @@ -0,0 +1,18 @@ +#include "builtin.h" + +#include + + +void f_false(json_t* input[], json_t* output[]) { + output[0] = json_false(); +} + +void f_true(json_t* input[], json_t* output[]) { + output[0] = json_true(); +} + +struct cfunction function_list[] = { + {f_true, "true", CALL_BUILTIN_1_1}, + {f_false, "false", CALL_BUILTIN_1_1}, +}; +struct symbol_table builtins = {function_list, sizeof(function_list)/sizeof(function_list[0])}; diff --git a/c/builtin.h b/c/builtin.h new file mode 100644 index 0000000000..5b0d3702a2 --- /dev/null +++ b/c/builtin.h @@ -0,0 +1,8 @@ +#ifndef BUILTIN_H +#define BUILTIN_H + +#include "bytecode.h" + +extern struct symbol_table builtins; + +#endif diff --git a/c/bytecode.c b/c/bytecode.c new file mode 100644 index 0000000000..477d3aa474 --- /dev/null +++ b/c/bytecode.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include "bytecode.h" +#include "opcode.h" + +void dump_disassembly(struct bytecode* bc) { + int pc = 0; + while (pc < bc->codelen) { + dump_operation(bc, bc->code + pc); + printf("\n"); + pc += opcode_length(bc->code[pc]); + } +} + +void dump_operation(struct bytecode* bc, uint16_t* codeptr) { + int pc = codeptr - bc->code; + printf("%04d ", pc); + const struct opcode_description* op = opcode_describe(bc->code[pc++]); + printf("%s", op->name); + if (op->flags & OP_HAS_IMMEDIATE) { + uint16_t imm = bc->code[pc++]; + printf(" "); + if (op->flags & OP_HAS_BRANCH) { + printf("%04d", pc + imm); + } else if (op->flags & OP_HAS_CONSTANT) { + json_dumpf(json_array_get(bc->constants, imm), + stdout, JSON_ENCODE_ANY); + } else if (op->flags & OP_HAS_VARIABLE) { + printf("v%d", imm); + } else { + printf("%d", imm); + } + } +} diff --git a/c/bytecode.h b/c/bytecode.h new file mode 100644 index 0000000000..ddf1105a1b --- /dev/null +++ b/c/bytecode.h @@ -0,0 +1,33 @@ +#ifndef BYTECODE_H +#define BYTECODE_H +#include +#include + +#include "opcode.h" + +typedef void (*cfunction_ptr)(json_t* input[], json_t* output[]); + +struct cfunction { + cfunction_ptr fptr; + const char* name; + opcode callop; +}; + +#define MAX_CFUNCTION_ARGS 10 +struct symbol_table { + struct cfunction* cfunctions; + int ncfunctions; +}; + + +struct bytecode { + uint16_t* code; + int codelen; + int framesize; + json_t* constants; + struct symbol_table* globals; +}; + +void dump_disassembly(struct bytecode* code); +void dump_operation(struct bytecode* bc, uint16_t* op); +#endif diff --git a/c/compile.c b/c/compile.c new file mode 100644 index 0000000000..cc3445754e --- /dev/null +++ b/c/compile.c @@ -0,0 +1,288 @@ +#include +#include +#include "opcode.h" +#include "compile.h" + + +struct inst { + struct inst* next; + struct inst* prev; + + opcode op; + + union { + uint16_t intval; + struct inst* target; + json_t* constant; + char* symbol; + } imm; + + struct inst* var_binding; + int var_frame_idx; + + int bytecode_pos; // position just after this insn +}; + +static inst* inst_new(opcode op) { + inst* i = malloc(sizeof(inst)); + i->next = i->prev = 0; + i->op = op; + i->bytecode_pos = -1; + i->var_binding = 0; + i->var_frame_idx = 0; + return i; +} + +static void inst_free(struct inst* i) { + if (opcode_describe(i->op)->flags & + (OP_HAS_SYMBOL | OP_HAS_VARIABLE)) { + free(i->imm.symbol); + } + free(i); +} + +static block inst_block(inst* i) { + block b = {i,i}; + return b; +} + +block gen_noop() { + block b = {0,0}; + return b; +} + +block gen_op_simple(opcode op) { + assert(!(opcode_describe(op)->flags & OP_HAS_IMMEDIATE)); + return inst_block(inst_new(op)); +} + + +block gen_op_const(opcode op, json_t* constant) { + assert(opcode_describe(op)->flags & OP_HAS_CONSTANT); + inst* i = inst_new(op); + i->imm.constant = constant; + return inst_block(i); +} + +block gen_op_target(opcode op, block target) { + assert(opcode_describe(op)->flags & OP_HAS_BRANCH); + assert(target.last); + inst* i = inst_new(op); + i->imm.target = target.last; + return inst_block(i); +} + +block gen_op_targetlater(opcode op) { + assert(opcode_describe(op)->flags & OP_HAS_BRANCH); + inst* i = inst_new(op); + i->imm.target = 0; + return inst_block(i); +} +void inst_set_target(block b, block target) { + assert(b.first); + assert(b.first == b.last); + assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH); + assert(target.last); + b.first->imm.target = target.last; +} + +block gen_op_var_unbound(opcode op, const char* name) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); + inst* i = inst_new(op); + i->imm.symbol = strdup(name); + return inst_block(i); +} + +block gen_op_var_bound(opcode op, block binder) { + assert(binder.first); + assert(binder.first == binder.last); + block b = gen_op_var_unbound(op, binder.first->imm.symbol); + b.first->var_binding = binder.first; + return b; +} + +block gen_op_symbol(opcode op, const char* sym) { + assert(opcode_describe(op)->flags & OP_HAS_SYMBOL); + inst* i = inst_new(op); + i->imm.symbol = strdup(sym); + return inst_block(i); +} + +static void inst_join(inst* a, inst* b) { + assert(a && b); + assert(!a->next); + assert(!b->prev); + a->next = b; + b->prev = a; +} + +void block_append(block* b, block b2) { + if (b2.first) { + if (b->last) { + inst_join(b->last, b2.first); + } else { + b->first = b2.first; + } + b->last = b2.last; + } +} + +block block_join(block a, block b) { + block c = a; + block_append(&c, b); + return c; +} + +block block_bind(block binder, block body) { + assert(binder.first); + assert(binder.first == binder.last); + assert(opcode_describe(binder.first->op)->flags & OP_HAS_VARIABLE); + assert(binder.first->imm.symbol); + assert(binder.first->var_binding == 0); + + binder.first->var_binding = binder.first; + for (inst* i = body.first; i; i = i->next) { + if (opcode_describe(i->op)->flags & OP_HAS_VARIABLE && + i->var_binding == 0 && + !strcmp(i->imm.symbol, binder.first->imm.symbol)) { + // bind this variable + i->var_binding = binder.first; + } + } + return block_join(binder, body); +} + + +block gen_subexp(block a) { + block c = gen_noop(); + block_append(&c, gen_op_simple(DUP)); + block_append(&c, a); + block_append(&c, gen_op_simple(SWAP)); + return c; +} + +block gen_both(block a, block b) { + block c = gen_noop(); + block jump = gen_op_targetlater(JUMP); + block fork = gen_op_targetlater(FORK); + block_append(&c, fork); + block_append(&c, a); + block_append(&c, jump); + inst_set_target(fork, c); + block_append(&c, b); + inst_set_target(jump, c); + return c; +} + + +block gen_collect(block expr) { + block c = gen_noop(); + block_append(&c, gen_op_simple(DUP)); + block_append(&c, gen_op_const(LOADK, json_array())); + block array_var = block_bind(gen_op_var_unbound(STOREV, "collect"), + gen_noop()); + block_append(&c, array_var); + + block tail = {0}; + block_append(&tail, gen_op_simple(DUP)); + block_append(&tail, gen_op_var_bound(LOADV, array_var)); + block_append(&tail, gen_op_simple(SWAP)); + block_append(&tail, gen_op_simple(APPEND)); + block_append(&tail, gen_op_var_bound(STOREV, array_var)); + block_append(&tail, gen_op_simple(BACKTRACK)); + + block_append(&c, gen_op_target(FORK, tail)); + block_append(&c, expr); + block_append(&c, tail); + + block_append(&c, gen_op_var_bound(LOADV, array_var)); + + return c; +} + +block gen_else(block a, block b) { + assert(0); +} + + +struct bytecode* block_compile(struct symbol_table* syms, block b) { + inst* curr = b.first; + int pos = 0; + int var_frame_idx = 0; + for (; curr; curr = curr->next) { + if (!curr->next) assert(curr == b.last); + pos += opcode_length(curr->op); + curr->bytecode_pos = pos; + if (opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) { + assert(curr->var_binding && "unbound variable"); + if (curr->var_binding == curr) { + curr->var_frame_idx = var_frame_idx++; + } + } + } + struct bytecode* bc = malloc(sizeof(struct bytecode)); + bc->codelen = pos; + uint16_t* code = malloc(sizeof(uint16_t) * bc->codelen); + bc->code = code; + int* stack_height = malloc(sizeof(int) * (bc->codelen + 1)); + for (int i = 0; icodelen + 1; i++) stack_height[i] = -1; + pos = 0; + json_t* constant_pool = json_array(); + int maxvar = -1; + int curr_stack_height = 1; + for (curr = b.first; curr; curr = curr->next) { + const struct opcode_description* op = opcode_describe(curr->op); + if (curr_stack_height < op->stack_in) { + printf("Stack underflow at %04d\n", curr->bytecode_pos); + } + if (stack_height[curr->bytecode_pos] != -1 && + stack_height[curr->bytecode_pos] != curr_stack_height) { + // FIXME: not sure this is right at all :( + printf("Inconsistent stack heights at %04d %s\n", curr->bytecode_pos, op->name); + } + curr_stack_height -= op->stack_in; + curr_stack_height += op->stack_out; + code[pos++] = curr->op; + int opflags = op->flags; + if (opflags & OP_HAS_CONSTANT) { + code[pos++] = json_array_size(constant_pool); + json_array_append(constant_pool, curr->imm.constant); + } else if (opflags & OP_HAS_VARIABLE) { + uint16_t var = (uint16_t)curr->var_binding->var_frame_idx; + code[pos++] = var; + if (var > maxvar) maxvar = var; + } else if (opflags & OP_HAS_BRANCH) { + assert(curr->imm.target->bytecode_pos != -1); + assert(curr->imm.target->bytecode_pos > pos); // only forward branches + code[pos] = curr->imm.target->bytecode_pos - (pos + 1); + stack_height[curr->imm.target->bytecode_pos] = curr_stack_height; + pos++; + } else if (opflags & OP_HAS_CFUNC) { + assert(curr->imm.symbol); + int found = 0; + for (int i=0; incfunctions; i++) { + if (!strcmp(curr->imm.symbol, syms->cfunctions[i].name)) { + code[pos++] = i; + found = 1; + break; + } + } + assert(found); + } else if (opflags & OP_HAS_IMMEDIATE) { + code[pos++] = curr->imm.intval; + } + } + free(stack_height); + bc->constants = constant_pool; + bc->framesize = maxvar + 2; // FIXME: frames of size zero? + bc->globals = syms; + return bc; +} + +void block_free(block b) { + struct inst* next; + for (struct inst* curr = b.first; curr; curr = next) { + next = curr->next; + inst_free(curr); + } +} diff --git a/c/compile.h b/c/compile.h new file mode 100644 index 0000000000..0ba54f15d5 --- /dev/null +++ b/c/compile.h @@ -0,0 +1,34 @@ +#include +#include "bytecode.h" +#include "opcode.h" + +struct inst; +typedef struct inst inst; + + +typedef struct block { + inst* first; + inst* last; +} block; + +block gen_noop(); +block gen_op_simple(opcode op); +block gen_op_const(opcode op, json_t* constant); +block gen_op_target(opcode op, block target); +block gen_op_var_unbound(opcode op, const char* name); +block gen_op_var_bound(opcode op, block binder); +block gen_op_symbol(opcode op, const char* name); + +block gen_subexp(block a); +block gen_both(block a, block b); +block gen_collect(block expr); +block gen_else(block a, block b); + + +void block_append(block* b, block b2); +block block_join(block a, block b); +block block_bind(block binder, block body); + +struct bytecode* block_compile(struct symbol_table*, block); + +void block_free(block); diff --git a/c/execute.c b/c/execute.c new file mode 100644 index 0000000000..c06ed124d8 --- /dev/null +++ b/c/execute.c @@ -0,0 +1,376 @@ +#include +#include +#include +#include +#include + + +#include "opcode.h" +#include "bytecode.h" +#include "compile.h" + +#include "forkable_stack.h" + + +typedef struct { + json_t* value; + int pathidx; +} stackval; + + +json_t** pathbuf; +int pathsize; // number of allocated elements + + +int path_push(stackval sv, json_t* val) { + int pos = sv.pathidx; + assert(pos <= pathsize); + assert(pos >= 0); + if (pos == pathsize) { + pathsize = pathsize ? pathsize * 2 : 100; + pathbuf = realloc(pathbuf, sizeof(pathbuf[0]) * pathsize); + } + pathbuf[pos] = val; + return pos + 1; +} + +stackval stackval_replace(stackval value, json_t* newjs) { + stackval s = {newjs, value.pathidx}; + return s; +} + + +// Probably all uses of this function are bugs +stackval stackval_root(json_t* v) { + stackval s = {v, 0}; + return s; +} + +struct forkable_stack data_stk; +typedef struct { + FORKABLE_STACK_HEADER; + stackval sv; +} data_stk_elem; + +data_stk_elem* stk_push_frame(int n) { + return forkable_stack_push(&data_stk, sizeof(data_stk_elem) * n); +} + +void stk_pop_frame(int n) { + forkable_stack_pop(&data_stk, sizeof(data_stk_elem) * n); +} + +void stk_push(stackval val) { + data_stk_elem* s = stk_push_frame(1); + s->sv = val; +} + +stackval stk_pop() { + data_stk_elem* s = forkable_stack_peek(&data_stk, sizeof(data_stk_elem)); + stackval sv = s->sv; + forkable_stack_pop(&data_stk, sizeof(data_stk_elem)); + return sv; +} + + +typedef struct { + FORKABLE_STACK_HEADER; + struct bytecode* bc; + data_stk_elem* fp; + uint16_t* pc; +} call_stk_elem; +struct forkable_stack call_stk; + + +struct forkpoint { + FORKABLE_STACK_HEADER; + struct forkable_stack_state saved_data_stack; + struct forkable_stack_state saved_call_stack; +}; + +struct forkable_stack fork_stk; + +void stack_save(){ + struct forkpoint* fork = forkable_stack_push(&fork_stk, sizeof(struct forkpoint)); + forkable_stack_save(&data_stk, &fork->saved_data_stack); + forkable_stack_save(&call_stk, &fork->saved_call_stack); +} + +void stack_switch() { + struct forkpoint* fork = forkable_stack_peek(&fork_stk, sizeof(struct forkpoint)); + forkable_stack_switch(&data_stk, &fork->saved_data_stack); + forkable_stack_switch(&call_stk, &fork->saved_call_stack); +} + +void stack_restore(){ + struct forkpoint* fork = forkable_stack_peek(&fork_stk, sizeof(struct forkpoint)); + forkable_stack_restore(&data_stk, &fork->saved_data_stack); + forkable_stack_restore(&call_stk, &fork->saved_call_stack); + forkable_stack_pop(&fork_stk, sizeof(struct forkpoint)); +} + +#define stack_push stk_push +#define stack_pop stk_pop + +#define ON_BACKTRACK(op) ((op)+NUM_OPCODES) + +json_t* jq_next() { + assert(!forkable_stack_empty(&call_stk)); + call_stk_elem* ctx = forkable_stack_peek(&call_stk, sizeof(call_stk_elem)); + struct bytecode* bc = ctx->bc; + uint16_t* pc = ctx->pc; + data_stk_elem* fp = ctx->fp; + json_t* cpool = bc->constants; + + json_t* cfunc_input[MAX_CFUNCTION_ARGS] = {0}; + json_t* cfunc_output[MAX_CFUNCTION_ARGS] = {0}; + int backtracking = 0; + while (1) { + + dump_operation(bc, pc); + + uint16_t opcode = *pc++; + + printf("\t"); + const struct opcode_description* opdesc = opcode_describe(opcode); + data_stk_elem* param = forkable_stack_peek(&data_stk, sizeof(data_stk_elem)); + for (int i=0; istack_in; i++) { + json_dumpf(param->sv.value, stdout, JSON_ENCODE_ANY); + if (i < opdesc->stack_in-1) printf(" | "); + param = forkable_stack_peek_next(&data_stk, param, sizeof(data_stk_elem)); + } + + if (backtracking) { + printf("\t"); + opcode = ON_BACKTRACK(opcode); + backtracking = 0; + } + + printf("\n"); + + switch (opcode) { + default: assert(0 && "invalid instruction"); + + case LOADK: { + json_t* v = json_array_get(cpool, *pc++); + assert(v); + stack_push(stackval_replace(stack_pop(), v)); + break; + } + + case DUP: { + stackval v = stack_pop(); + stack_push(v); + stack_push(v); + break; + } + + case SWAP: { + stackval a = stack_pop(); + stackval b = stack_pop(); + stack_push(a); + stack_push(b); + break; + } + + case POP: { + stack_pop(); + break; + } + + case APPEND: { + // FIXME paths + json_t* v = stack_pop().value; + json_t* array = stack_pop().value; + array = json_copy(array); + json_array_append(array, v); + stack_push(stackval_root(array)); + break; + } + + case INSERT: { + stackval stktop = stack_pop(); + json_t* v = stack_pop().value; + json_t* k = stack_pop().value; + stackval objv = stack_pop(); + assert(json_is_string(k)); + assert(json_is_object(objv.value)); + json_t* obj = json_copy(objv.value); + json_object_set(obj, json_string_value(k), v); + assert(json_is_object(obj)); + stack_push(stackval_replace(objv, obj)); + stack_push(stktop); + break; + } + + case LOADV: { + uint16_t v = *pc++; + stack_push(stackval_replace(stack_pop(), fp[v].sv.value)); + break; + } + + case STOREV: { + uint16_t v = *pc++; + stackval val = stack_pop(); + printf("V%d = ", v); + json_dumpf(val.value, stdout, JSON_ENCODE_ANY); + printf("\n"); + fp[v].sv.value = val.value; + break; + } + +#if 0 + case DISPLAY: { + stackval sv = stack_pop(); + if (sv.value) { + json_dumpf(sv.value, stdout, JSON_ENCODE_ANY); + } else { + printf("#ERROR"); + } + printf(" - "); + for (int i = 0; i < sv.pathidx; i++) { + printf("/"); + json_dumpf(pathbuf[i], stdout, JSON_ENCODE_ANY); + } + printf("\n"); + return; + } +#endif + + case INDEX: { + stackval t = stack_pop(); + json_t* k = stack_pop().value; + stackval v; + if (json_is_string(k)) { + v.value = json_object_get(t.value, json_string_value(k)); + } else if (json_is_integer(k)) { + v.value = json_array_get(t.value, json_integer_value(k)); + } else { + assert(0 && "key neither string nor int"); + } + if (v.value) { + v.pathidx = path_push(t, k); + stack_push(v); + } else { + assert(0 && "bad lookup"); + } + break; + } + + + case JUMP: { + uint16_t offset = *pc++; + pc += offset; + break; + } + + case EACH: + stack_push(stackval_root(json_integer(0))); + // fallthrough + case ON_BACKTRACK(EACH): { + json_t* idxj = stack_pop().value; + int idx = json_integer_value(idxj); + stackval array = stack_pop(); + if (idx >= json_array_size(array.value)) { + goto do_backtrack; + } else { + stack_save(); + stack_push(array); + stack_push(stackval_root(json_integer(idx+1))); + call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem)); + ctx->bc = bc; + ctx->fp = fp; + ctx->pc = pc - 1; + stack_switch(); + + stackval sv = {json_array_get(array.value, idx), + path_push(array, json_integer(idx))}; + stack_push(sv); + } + break; + } + + do_backtrack: + case BACKTRACK: { + if (forkable_stack_empty(&fork_stk)) { + return 0; + } + stack_restore(); + call_stk_elem* ctx = forkable_stack_peek(&call_stk, sizeof(call_stk_elem)); + bc = ctx->bc; + pc = ctx->pc; + fp = ctx->fp; + cpool = bc->constants; + forkable_stack_pop(&call_stk, sizeof(call_stk_elem)); + backtracking = 1; + break; + } + + case FORK: { + stack_save(); + call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem)); + ctx->bc = bc; + ctx->fp = fp; + ctx->pc = pc - 1; + stack_switch(); + pc++; // skip offset this time + break; + } + case ON_BACKTRACK(FORK): { + uint16_t offset = *pc++; + pc += offset; + break; + } + + case YIELD: { + json_t* value = stack_pop().value; + call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem)); + ctx->bc = bc; + ctx->fp = fp; + ctx->pc = pc; + return value; + } + + case CALL_BUILTIN_1_1: { + stackval top = stack_pop(); + cfunc_input[0] = top.value; + struct cfunction* func = &bc->globals->cfunctions[*pc++]; + printf(" call %s\n", func->name); + func->fptr(cfunc_input, cfunc_output); + stack_push(stackval_replace(top, cfunc_output[0])); + break; + } + + + } + } +} + + +void jq_init(struct bytecode* bc, json_t* input) { + forkable_stack_init(&data_stk, sizeof(stackval) * 100); // FIXME: lower this number, see if it breaks + forkable_stack_init(&call_stk, 1024); // FIXME: lower this number, see if it breaks + forkable_stack_init(&fork_stk, 1024); // FIXME: lower this number, see if it breaks + + data_stk_elem* frame = stk_push_frame(bc->framesize); + + stack_push(stackval_root(input)); + call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem)); + ctx->pc = bc->code; + ctx->bc = bc; + ctx->fp = frame; +} + +void run_program(struct bytecode* bc) { + jq_init(bc, json_loadf(stdin, 0, 0)); + json_t* result; + while ((result = jq_next())) { + json_dumpf(result, stdout, JSON_ENCODE_ANY); + printf("\n"); + } + printf("end of results\n"); + + //assert(frame == stack_top_frame(bc->framesize)); + //stk_pop_frame(bc->framesize); + //assert(stackpos == 0); +} diff --git a/c/execute.h b/c/execute.h new file mode 100644 index 0000000000..b978a8e442 --- /dev/null +++ b/c/execute.h @@ -0,0 +1,14 @@ +#include "opcode.h" +#define MAX_CFUNCTION_PARAM 10 +typedef struct { + json_t* value; + int pathidx; +} stackval; + +typedef void (*cfunction_ptr)(stackval* input, stackval* output); + +struct cfunction { + cfunction_ptr fptr; + const char* name; + opcode callop; +}; diff --git a/c/forkable_stack.h b/c/forkable_stack.h new file mode 100644 index 0000000000..8043c426cb --- /dev/null +++ b/c/forkable_stack.h @@ -0,0 +1,94 @@ +#include +#include +#include + +struct forkable_stack_header { + int next; +}; + +#define FORKABLE_STACK_HEADER struct forkable_stack_header fk_header_ + +struct forkable_stack { + char* stk; + + // stk+length is just past end of allocated area + int length; + + // stk+pos is just past top-of-stack item + int pos; + + // everything before stk+savedlimit must be preserved + int savedlimit; +}; + +static void forkable_stack_check(struct forkable_stack* s) { + assert(s->stk); + assert(s->length > 0); + assert(s->pos >= 0 && s->pos <= s->length); + assert(s->savedlimit >= 0 && s->savedlimit <= s->length); +} + +static int forkable_stack_empty(struct forkable_stack* s) { + return s->pos == 0; +} + +static void forkable_stack_init(struct forkable_stack* s, size_t sz) { + s->stk = malloc(sz); + s->length = sz; + s->pos = 0; + s->savedlimit = 0; + forkable_stack_check(s); +} + +static void* forkable_stack_push(struct forkable_stack* s, size_t size) { + forkable_stack_check(s); + int curr = s->pos > s->savedlimit ? s->pos : s->savedlimit; + if (curr + size > s->length) { + s->length = (size + s->length + 1024) * 2; + s->stk = realloc(s->stk, s->length); + } + void* ret = (void*)(s->stk + curr); + ((struct forkable_stack_header*)ret)->next = s->pos; + s->pos = curr + size; + return ret; +} + +static void* forkable_stack_peek(struct forkable_stack* s, size_t size) { + assert(!forkable_stack_empty(s)); + return (void*)(s->stk + s->pos - size); +} + +static void* forkable_stack_peek_next(struct forkable_stack* s, void* top, size_t size) { + struct forkable_stack_header* elem = top; + return (void*)(s->stk + elem->next - size); +} + +static void forkable_stack_pop(struct forkable_stack* s, size_t size) { + struct forkable_stack_header* elem = forkable_stack_peek(s, size); + s->pos = elem->next; +} + +struct forkable_stack_state { + int prevpos, prevlimit; +}; + +static void forkable_stack_save(struct forkable_stack* s, struct forkable_stack_state* state) { + state->prevpos = s->pos; + state->prevlimit = s->savedlimit; + if (s->pos > s->savedlimit) s->savedlimit = s->pos; +} + +static void forkable_stack_switch(struct forkable_stack* s, struct forkable_stack_state* state) { + int curr_pos = s->pos; + s->pos = state->prevpos; + state->prevpos = curr_pos; + + int curr_limit = s->savedlimit; + if (curr_pos > curr_limit) s->savedlimit = curr_pos; + state->prevlimit = curr_limit; +} + +static void forkable_stack_restore(struct forkable_stack* s, struct forkable_stack_state* state) { + s->pos = state->prevpos; + s->savedlimit = state->prevlimit; +} diff --git a/c/lexer.l b/c/lexer.l new file mode 100644 index 0000000000..42103c4039 --- /dev/null +++ b/c/lexer.l @@ -0,0 +1,25 @@ +%{ +#include "compile.h" +#include "parser.tab.h" /* Generated by bison. */ +%} + +%option noyywrap nounput noinput nodefault +%option reentrant +%option bison-bridge bison-locations + +%% + +"==" { return EQ; } +"as" { return AS; } +"."|"="|";"|"["|"]"|","|":"|"("|")"|"{"|"}"|"|"|"+"|"\$" { return yytext[0];} + +[[:digit:]]+ { yylval->num = atoi(yytext); return NUMBER;} +[[:alnum:]]+ { yylval->str = strdup(yytext); return IDENT;} +[ \n\t]+ {} +%% +/* perhaps these should be calls... */ +/* +"true" { return TRUE; } +"false" { return FALSE; } +"null" { return NULL; } +*/ diff --git a/c/main.c b/c/main.c new file mode 100644 index 0000000000..5017d9fa9d --- /dev/null +++ b/c/main.c @@ -0,0 +1,82 @@ +#include +#include "compile.h" +#include "parser.tab.h" +#include "builtin.h" + +block compile(const char* str); + +void jq_init(struct bytecode* bc, json_t* value); +json_t* jq_next(); + +void run_program(struct bytecode* bc); + +int skipline(const char* buf) { + int p = 0; + while (buf[p] == ' ' || buf[p] == '\t') p++; + if (buf[p] == '#' || buf[p] == '\n' || buf[p] == 0) return 1; + return 0; +} + +void run_tests() { + FILE* testdata = fopen("testdata","r"); + char buf[4096]; + int tests = 0, passed = 0; + + while (1) { + if (!fgets(buf, sizeof(buf), testdata)) break; + if (skipline(buf)) continue; + printf("Testing %s\n", buf); + int pass = 1; + block program = compile(buf); + block_append(&program, gen_op_simple(YIELD)); + block_append(&program, gen_op_simple(BACKTRACK)); + struct bytecode* bc = block_compile(&builtins, program); + block_free(program); + fgets(buf, sizeof(buf), testdata); + json_t* input = json_loads(buf, JSON_DECODE_ANY, 0); + jq_init(bc, input); + + while (fgets(buf, sizeof(buf), testdata)) { + if (skipline(buf)) break; + json_t* expected = json_loads(buf, JSON_DECODE_ANY, 0); + json_t* actual = jq_next(); + if (!actual) { + printf("Insufficient results\n"); + pass = 0; + break; + } else if (!json_equal(expected, actual)) { + printf("Expected "); + json_dumpf(expected, stdout, JSON_ENCODE_ANY); + printf(", but got "); + json_dumpf(actual, stdout, JSON_ENCODE_ANY); + printf("\n"); + pass = 0; + break; + } + } + if (pass) { + json_t* extra = jq_next(); + if (extra) { + printf("Superfluous result: "); + json_dumpf(extra, stdout, JSON_ENCODE_ANY); + printf("\n"); + pass = 0; + } + } + tests++; + passed+=pass; + } + fclose(testdata); + printf("%d of %d tests passed\n", passed,tests); +} + +int main(int argc, char* argv[]) { + if (argc == 1) { run_tests(); return 0; } + block blk = compile(argv[1]); + block_append(&blk, block_join(gen_op_simple(YIELD), gen_op_simple(BACKTRACK))); + struct bytecode* bc = block_compile(&builtins, blk); + block_free(blk); + dump_disassembly(bc); + printf("\n"); + run_program(bc); +} diff --git a/c/opcode.c b/c/opcode.c new file mode 100644 index 0000000000..b14e0db622 --- /dev/null +++ b/c/opcode.c @@ -0,0 +1,27 @@ +#include "opcode.h" + +#define NONE 0 +#define CONSTANT (OP_HAS_IMMEDIATE | OP_HAS_CONSTANT) +#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE) +#define BRANCH (OP_HAS_IMMEDIATE | OP_HAS_BRANCH) +#define CFUNC (OP_HAS_IMMEDIATE | OP_HAS_SYMBOL | OP_HAS_CFUNC) + +#define OP(name, imm, in, out) \ + {name, #name, imm, in, out}, + +static const struct opcode_description opcode_descriptions[] = { +#include "opcode_list.h" +}; + +static const struct opcode_description invalid_opcode_description = { + -1, "#INVALID", 0, 0, 0 +}; + + +const struct opcode_description* opcode_describe(opcode op) { + if ((int)op >= 0 && (int)op < NUM_OPCODES) { + return &opcode_descriptions[op]; + } else { + return &invalid_opcode_description; + } +} diff --git a/c/opcode.h b/c/opcode.h new file mode 100644 index 0000000000..797e6a96cd --- /dev/null +++ b/c/opcode.h @@ -0,0 +1,36 @@ +#ifndef OPCODE_H +#define OPCODE_H +typedef enum { +#define OP(name, imm, in, out) name, +#include "opcode_list.h" +#undef OP +} opcode; + +enum { + NUM_OPCODES = +#define OP(name, imm, in, out) +1 +#include "opcode_list.h" +#undef OP +}; + +enum { + OP_HAS_IMMEDIATE = 1, + OP_HAS_CONSTANT = 2, + OP_HAS_VARIABLE = 4, + OP_HAS_BRANCH = 8, + OP_HAS_SYMBOL = 16, + OP_HAS_CFUNC = 32 +}; +struct opcode_description { + opcode op; + const char* name; + int flags; + int stack_in, stack_out; +}; + +const struct opcode_description* opcode_describe(opcode op); + +static inline int opcode_length(opcode op) { + return 1 + (opcode_describe(op)->flags & OP_HAS_IMMEDIATE ? 1 : 0); +} +#endif diff --git a/c/opcode_list.h b/c/opcode_list.h new file mode 100644 index 0000000000..d88381b79d --- /dev/null +++ b/c/opcode_list.h @@ -0,0 +1,17 @@ +OP(LOADK, CONSTANT, 1, 1) +OP(DUP, NONE, 1, 2) +OP(SWAP, NONE, 2, 2) +OP(POP, NONE, 1, 0) +OP(LOADV, VARIABLE, 1, 1) +OP(STOREV, VARIABLE, 1, 0) +OP(INDEX, NONE, 2, 1) +//OP(DISPLAY, NONE, 1, 0) +OP(YIELD, NONE, 1, 0) +OP(EACH, NONE, 1, 1) +OP(FORK, BRANCH, 0, 0) +OP(JUMP, BRANCH, 0, 0) +OP(BACKTRACK, NONE, 0, 0) +OP(APPEND, NONE, 2, 1) +OP(INSERT, NONE, 4, 2) + +OP(CALL_BUILTIN_1_1, CFUNC, 1, 1) diff --git a/c/parser.y b/c/parser.y new file mode 100644 index 0000000000..e8721a17d8 --- /dev/null +++ b/c/parser.y @@ -0,0 +1,162 @@ +%{ +#include +#include +#include "compile.h" +%} + +%locations +%define api.pure +%union { + int num; + char* str; + block blk; +} + +%parse-param {block* answer} +%parse-param {yyscan_t lexer} +%lex-param {yyscan_t lexer} + + +%token IDENT +%token NUMBER + +%left '|' +%left ',' +%token EQ "==" +%token AS "as" +%nonassoc EQ +%left '+' + +%type Exp Term MkDict MkDictPair ExpD + +%{ +#include "lexer.yy.h" +void yyerror(YYLTYPE* loc, block* answer, yyscan_t lexer, const char *s){ + printf("ERROR: %s\n", s); +} + +static block gen_dictpair(block k, block v) { + block b = gen_subexp(k); + block_append(&b, gen_subexp(v)); + block_append(&b, gen_op_simple(INSERT)); + return b; +} + +static block gen_string(const char* str) { + return gen_op_const(LOADK, json_string(str)); +} + +static block gen_index(block obj, block key) { + return block_join(obj, block_join(gen_subexp(key), gen_op_simple(INDEX))); +} + +%} + +%% +program: Exp { *answer = $1; } + + +Exp: +Term "as" '$' IDENT '|' Exp { + $$ = gen_op_simple(DUP); + block_append(&$$, $1); + block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6)); +} | + +Exp '|' Exp { + $$ = block_join($1, $3); +} | + +Exp ',' Exp { + $$ = gen_both($1, $3); +} | + +Term { + $$ = $1; +} + + +ExpD: +ExpD '|' ExpD { + $$ = block_join($1, $3); +} | + +Term { + $$ = $1; +} + + +Term: +'.' { + $$ = gen_noop(); +} | +Term '.' IDENT { + $$ = gen_index($1, gen_string($3)); +} | +'.' IDENT { + $$ = gen_index(gen_noop(), gen_string($2)); +} | +/* FIXME: string literals */ +Term '[' Exp ']' { + $$ = gen_index($1, $3); +} | +Term '[' ']' { + $$ = block_join($1, gen_op_simple(EACH)); +} | +NUMBER { + $$ = gen_op_const(LOADK, json_integer($1)); +} | +'(' Exp ')' { + $$ = $2; +} | +'[' Exp ']' { + $$ = gen_collect($2); +} | +'[' ']' { + $$ = gen_op_const(LOADK, json_array()); +} | +'{' MkDict '}' { + $$ = gen_subexp(gen_op_const(LOADK, json_object())); + block_append(&$$, $2); + block_append(&$$, gen_op_simple(POP)); +} | +IDENT { + $$ = gen_op_symbol(CALL_BUILTIN_1_1, $1); +} | +'$' IDENT { + $$ = gen_op_var_unbound(LOADV, $2); +} + +MkDict: +{ + $$=gen_noop(); +} +| +MkDictPair +{ $$ = $1; } +| MkDictPair ',' MkDict { $$=block_join($1, $3); } + +MkDictPair +: IDENT ':' ExpD { + $$ = gen_dictpair(gen_string($1), $3); + } +| IDENT { + $$ = gen_dictpair(gen_string($1), + gen_index(gen_noop(), gen_string($1))); + } +| '(' Exp ')' ':' ExpD { + $$ = gen_dictpair($2, $5); + } +%% + +block compile(const char* str) { + yyscan_t scanner; + YY_BUFFER_STATE buf; + block answer = gen_noop(); + yylex_init(&scanner); + buf = yy_scan_string(str, scanner); + yyparse(&answer, scanner); + yy_delete_buffer(buf, scanner); + yylex_destroy(scanner); + return answer; +}