Skip to content

Commit

Permalink
[ruby/prism] Switch locals to use a hash
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton authored and matzbot committed Apr 5, 2024
1 parent 1f84e10 commit 358aeb1
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 17 deletions.
24 changes: 23 additions & 1 deletion prism/parser.h
Expand Up @@ -509,6 +509,28 @@ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL =
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = 0x2;
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;

/**
* This tracks an individual local variable in a certain lexical context, as
* well as the number of times is it read.
*/
typedef struct {
pm_constant_id_t name;
uint32_t index;
uint32_t reads;
uint32_t hash;
} pm_local_t;

/**
* This is a set of local variables in a certain lexical context (method, class,
* module, etc.). We need to track how many times these variables are read in
* order to warn if they only get written.
*/
typedef struct pm_locals {
uint32_t size;
uint32_t capacity;
pm_local_t *locals;
} pm_locals_t;

/**
* This struct represents a node in a linked list of scopes. Some scopes can see
* into their parent scopes, while others cannot.
Expand All @@ -518,7 +540,7 @@ typedef struct pm_scope {
struct pm_scope *previous;

/** The IDs of the locals in the given scope. */
pm_constant_id_list_t locals;
pm_locals_t locals;

/**
* This is a bitfield that indicates the parameters that are being used in
Expand Down
200 changes: 184 additions & 16 deletions prism/prism.c
Expand Up @@ -672,6 +672,168 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)

/******************************************************************************/
/* Local variable-related functions */
/******************************************************************************/

static void
pm_locals_free(pm_locals_t *locals) {
if (locals->capacity > 0) {
xfree(locals->locals);
}
}

/**
* Use the mid-square method to hash the given constant id.
*/
static uint32_t
pm_locals_hash(pm_constant_id_t name) {
uint64_t square = (uint64_t) name * (uint64_t) name;

uint32_t num_digits = (uint32_t) floor(log10(square) + 1);
uint32_t start = num_digits / 2;
uint32_t end = start + 1;

return (uint32_t) (((uint64_t) ((square / pow(10, start))) % (uint64_t) pow(10, end)));
}

static void
pm_locals_rehash(pm_locals_t *locals) {
uint32_t next_capacity = locals->capacity == 0 ? 8 : (locals->capacity * 2);
assert(next_capacity > locals->capacity);

pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
if (next_locals == NULL) abort();

uint32_t mask = next_capacity - 1;
for (uint32_t index = 0; index < locals->capacity; index++) {
pm_local_t *local = &locals->locals[index];

if (local->name != PM_CONSTANT_ID_UNSET) {
uint32_t hash = local->hash;

while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
next_locals[hash & mask] = *local;
}
}

pm_locals_free(locals);
locals->locals = next_locals;
locals->capacity = next_capacity;
}

/**
* Add a new local to the set of locals. This will automatically rehash the
* locals if the size is greater than 3/4 of the capacity.
*
* Returns true if the local was added, and false if the local already exists.
*/
static bool
pm_locals_write(pm_locals_t *locals, pm_constant_id_t name) {
if (locals->size >= (locals->capacity / 4 * 3)) {
pm_locals_rehash(locals);
}

uint32_t mask = locals->capacity - 1;
uint32_t hash = pm_locals_hash(name);
uint32_t initial_hash = hash;

do {
pm_local_t *local = &locals->locals[hash & mask];

if (local->name == PM_CONSTANT_ID_UNSET) {
*local = (pm_local_t) {
.name = name,
.index = locals->size++,
.reads = 0,
.hash = hash
};
return true;
} else if (local->name == name) {
return false;
} else {
hash++;
}
} while ((hash & mask) != initial_hash);

assert(false && "unreachable");
}

/**
* Finds the index of a local variable in the locals set. If it is not found,
* this returns UINT32_MAX.
*/
static uint32_t
pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
if (locals->capacity == 0) return UINT32_MAX;

uint32_t mask = locals->capacity - 1;
uint32_t hash = pm_locals_hash(name);
uint32_t initial_hash = hash & mask;

do {
pm_local_t *local = &locals->locals[hash & mask];

if (local->name == PM_CONSTANT_ID_UNSET) {
return UINT32_MAX;
} else if (local->name == name) {
return hash & mask;
} else {
hash++;
}
} while ((hash & mask) != initial_hash);

return UINT32_MAX;
}

/**
* Called when a variable is read in a certain lexical context. Tracks the read
* by adding to the reads count.
*/
// static void
// pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
// uint32_t index = pm_locals_find(locals, name);
// assert(index != UINT32_MAX);

// pm_local_t *local = &locals->locals[index];
// assert(local->reads < UINT32_MAX);

// local->reads++;
// }

/**
* Called when a variable read is transformed into a variable write, because a
* write operator is found after the variable name.
*/
// static void
// pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
// uint32_t index = pm_locals_find(locals, name);
// assert(index != UINT32_MAX);

// pm_local_t *local = &locals->locals[index];
// assert(local->reads > 0);

// local->reads--;
// }

/**
* Write out the locals into the given list of constant ids in the correct
* order. This is used to set the list of locals on the nodes in the tree once
* we're sure no additional locals will be added to the set.
*/
static void
pm_locals_order(pm_locals_t *locals, pm_constant_id_list_t *list) {
pm_constant_id_list_init_capacity(list, locals->size);

for (uint32_t index = 0; index < locals->capacity; index++) {
pm_local_t *local = &locals->locals[index];

if (local->name != PM_CONSTANT_ID_UNSET) {
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
}
}
}

/******************************************************************************/
/* Node-related functions */
/******************************************************************************/
Expand Down Expand Up @@ -7045,7 +7207,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
int depth = 0;

while (scope != NULL) {
if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
if (scope->closed) break;

scope = scope->previous;
Expand All @@ -7070,9 +7232,7 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
*/
static inline void
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
}
pm_locals_write(&parser->current_scope->locals, constant_id);
}

/**
Expand Down Expand Up @@ -7175,7 +7335,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
// whether it's already in the current scope.
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);

if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
// Add an error if the parameter doesn't start with _ and has been seen before
if ((name->start < name->end) && (*name->start != '_')) {
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
Expand All @@ -7186,14 +7346,13 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
}

/**
* Pop the current scope off the scope stack. Note that we specifically do not
* free the associated constant list because we assume that we have already
* transferred ownership of the list to the AST somewhere.
* Pop the current scope off the scope stack.
*/
static void
pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
pm_locals_free(&scope->locals);
xfree(scope);
}

Expand Down Expand Up @@ -13849,7 +14008,8 @@ parse_block(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
}

pm_constant_id_list_t locals = parser->current_scope->locals;
pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);

pm_parser_scope_pop(parser);
Expand Down Expand Up @@ -17173,7 +17333,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}

expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
pm_constant_id_list_t locals = parser->current_scope->locals;

pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);

pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
Expand Down Expand Up @@ -17234,7 +17396,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
}

pm_constant_id_list_t locals = parser->current_scope->locals;
pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);

pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
Expand Down Expand Up @@ -17509,7 +17672,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
end_keyword = parser->previous;
}

pm_constant_id_list_t locals = parser->current_scope->locals;
pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);

pm_parser_scope_pop(parser);
pm_parser_current_param_name_restore(parser, saved_param_name);
Expand Down Expand Up @@ -17774,7 +17938,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
statements = (pm_node_t *) parse_rescues_implicit_begin(parser, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE);
}

pm_constant_id_list_t locals = parser->current_scope->locals;
pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);

pm_parser_scope_pop(parser);
pm_parser_current_param_name_restore(parser, saved_param_name);

Expand Down Expand Up @@ -18541,7 +18707,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
}

pm_constant_id_list_t locals = parser->current_scope->locals;
pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);

pm_parser_scope_pop(parser);
Expand Down Expand Up @@ -19815,7 +19982,9 @@ parse_program(pm_parser_t *parser) {
if (!statements) {
statements = pm_statements_node_create(parser);
}
pm_constant_id_list_t locals = parser->current_scope->locals;

pm_constant_id_list_t locals;
pm_locals_order(&parser->current_scope->locals, &locals);
pm_parser_scope_pop(parser);

// If this is an empty file, then we're still going to parse all of the
Expand Down Expand Up @@ -20140,7 +20309,6 @@ pm_parser_free(pm_parser_t *parser) {
// assumed that ownership has transferred to the AST. However if we have
// scopes while we're freeing the parser, it's likely they came from
// eval scopes and we need to free them explicitly here.
pm_constant_id_list_free(&parser->current_scope->locals);
pm_parser_scope_pop(parser);
}

Expand Down
24 changes: 24 additions & 0 deletions prism/util/pm_constant_pool.c
Expand Up @@ -10,6 +10,18 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
list->capacity = 0;
}

/**
* Initialize a list of constant ids with a given capacity.
*/
void
pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
list->ids = xmalloc(sizeof(pm_constant_id_t) * capacity);
if (list->ids == NULL) abort();

list->size = 0;
list->capacity = capacity;
}

/**
* Append a constant id to a list of constant ids. Returns false if any
* potential reallocations fail.
Expand All @@ -26,6 +38,18 @@ pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
return true;
}

/**
* Insert a constant id into a list of constant ids at the specified index.
*/
void
pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
assert(index < list->capacity);
assert(list->ids[index] == PM_CONSTANT_ID_UNSET);

list->ids[index] = id;
list->size++;
}

/**
* Checks if the current constant id list includes the given constant id.
*/
Expand Down

0 comments on commit 358aeb1

Please sign in to comment.