Skip to content

Commit f389460

Browse files
committed
Switch locals to use a hash
1 parent 6a15e47 commit f389460

File tree

4 files changed

+248
-17
lines changed

4 files changed

+248
-17
lines changed

include/prism/parser.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,28 @@ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL =
509509
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = 0x2;
510510
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;
511511

512+
/**
513+
* This tracks an individual local variable in a certain lexical context, as
514+
* well as the number of times is it read.
515+
*/
516+
typedef struct {
517+
pm_constant_id_t name;
518+
uint32_t index;
519+
uint32_t reads;
520+
uint32_t hash;
521+
} pm_local_t;
522+
523+
/**
524+
* This is a set of local variables in a certain lexical context (method, class,
525+
* module, etc.). We need to track how many times these variables are read in
526+
* order to warn if they only get written.
527+
*/
528+
typedef struct pm_locals {
529+
uint32_t size;
530+
uint32_t capacity;
531+
pm_local_t *locals;
532+
} pm_locals_t;
533+
512534
/**
513535
* This struct represents a node in a linked list of scopes. Some scopes can see
514536
* into their parent scopes, while others cannot.
@@ -518,7 +540,7 @@ typedef struct pm_scope {
518540
struct pm_scope *previous;
519541

520542
/** The IDs of the locals in the given scope. */
521-
pm_constant_id_list_t locals;
543+
pm_locals_t locals;
522544

523545
/**
524546
* This is a bitfield that indicates the parameters that are being used in

include/prism/util/pm_constant_pool.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ typedef struct {
5151
*/
5252
void pm_constant_id_list_init(pm_constant_id_list_t *list);
5353

54+
/**
55+
* Initialize a list of constant ids with a given capacity.
56+
*
57+
* @param list The list to initialize.
58+
* @param capacity The initial capacity of the list.
59+
*/
60+
void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
61+
5462
/**
5563
* Append a constant id to a list of constant ids. Returns false if any
5664
* potential reallocations fail.
@@ -61,6 +69,15 @@ void pm_constant_id_list_init(pm_constant_id_list_t *list);
6169
*/
6270
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
6371

72+
/**
73+
* Insert a constant id into a list of constant ids at the specified index.
74+
*
75+
* @param list The list to insert into.
76+
* @param index The index at which to insert.
77+
* @param id The id to insert.
78+
*/
79+
void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
80+
6481
/**
6582
* Checks if the current constant id list includes the given constant id.
6683
*

src/prism.c

Lines changed: 184 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,168 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
672672
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
673673
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
674674

675+
/******************************************************************************/
676+
/* Local variable-related functions */
677+
/******************************************************************************/
678+
679+
static void
680+
pm_locals_free(pm_locals_t *locals) {
681+
if (locals->capacity > 0) {
682+
xfree(locals->locals);
683+
}
684+
}
685+
686+
/**
687+
* Use the mid-square method to hash the given constant id.
688+
*/
689+
static uint32_t
690+
pm_locals_hash(pm_constant_id_t name) {
691+
uint64_t square = (uint64_t) name * (uint64_t) name;
692+
693+
uint32_t num_digits = (uint32_t) floor(log10(square) + 1);
694+
uint32_t start = num_digits / 2;
695+
uint32_t end = start + 1;
696+
697+
return (uint32_t) (((uint64_t) ((square / pow(10, start))) % (uint64_t) pow(10, end)));
698+
}
699+
700+
static void
701+
pm_locals_rehash(pm_locals_t *locals) {
702+
uint32_t next_capacity = locals->capacity == 0 ? 8 : (locals->capacity * 2);
703+
assert(next_capacity > locals->capacity);
704+
705+
pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
706+
if (next_locals == NULL) abort();
707+
708+
uint32_t mask = next_capacity - 1;
709+
for (uint32_t index = 0; index < locals->capacity; index++) {
710+
pm_local_t *local = &locals->locals[index];
711+
712+
if (local->name != PM_CONSTANT_ID_UNSET) {
713+
uint32_t hash = local->hash;
714+
715+
while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
716+
next_locals[hash & mask] = *local;
717+
}
718+
}
719+
720+
pm_locals_free(locals);
721+
locals->locals = next_locals;
722+
locals->capacity = next_capacity;
723+
}
724+
725+
/**
726+
* Add a new local to the set of locals. This will automatically rehash the
727+
* locals if the size is greater than 3/4 of the capacity.
728+
*
729+
* Returns true if the local was added, and false if the local already exists.
730+
*/
731+
static bool
732+
pm_locals_write(pm_locals_t *locals, pm_constant_id_t name) {
733+
if (locals->size >= (locals->capacity / 4 * 3)) {
734+
pm_locals_rehash(locals);
735+
}
736+
737+
uint32_t mask = locals->capacity - 1;
738+
uint32_t hash = pm_locals_hash(name);
739+
uint32_t initial_hash = hash;
740+
741+
do {
742+
pm_local_t *local = &locals->locals[hash & mask];
743+
744+
if (local->name == PM_CONSTANT_ID_UNSET) {
745+
*local = (pm_local_t) {
746+
.name = name,
747+
.index = locals->size++,
748+
.reads = 0,
749+
.hash = hash
750+
};
751+
return true;
752+
} else if (local->name == name) {
753+
return false;
754+
} else {
755+
hash++;
756+
}
757+
} while ((hash & mask) != initial_hash);
758+
759+
assert(false && "unreachable");
760+
}
761+
762+
/**
763+
* Finds the index of a local variable in the locals set. If it is not found,
764+
* this returns UINT32_MAX.
765+
*/
766+
static uint32_t
767+
pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
768+
if (locals->capacity == 0) return UINT32_MAX;
769+
770+
uint32_t mask = locals->capacity - 1;
771+
uint32_t hash = pm_locals_hash(name);
772+
uint32_t initial_hash = hash & mask;
773+
774+
do {
775+
pm_local_t *local = &locals->locals[hash & mask];
776+
777+
if (local->name == PM_CONSTANT_ID_UNSET) {
778+
return UINT32_MAX;
779+
} else if (local->name == name) {
780+
return hash & mask;
781+
} else {
782+
hash++;
783+
}
784+
} while ((hash & mask) != initial_hash);
785+
786+
return UINT32_MAX;
787+
}
788+
789+
/**
790+
* Called when a variable is read in a certain lexical context. Tracks the read
791+
* by adding to the reads count.
792+
*/
793+
// static void
794+
// pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
795+
// uint32_t index = pm_locals_find(locals, name);
796+
// assert(index != UINT32_MAX);
797+
798+
// pm_local_t *local = &locals->locals[index];
799+
// assert(local->reads < UINT32_MAX);
800+
801+
// local->reads++;
802+
// }
803+
804+
/**
805+
* Called when a variable read is transformed into a variable write, because a
806+
* write operator is found after the variable name.
807+
*/
808+
// static void
809+
// pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
810+
// uint32_t index = pm_locals_find(locals, name);
811+
// assert(index != UINT32_MAX);
812+
813+
// pm_local_t *local = &locals->locals[index];
814+
// assert(local->reads > 0);
815+
816+
// local->reads--;
817+
// }
818+
819+
/**
820+
* Write out the locals into the given list of constant ids in the correct
821+
* order. This is used to set the list of locals on the nodes in the tree once
822+
* we're sure no additional locals will be added to the set.
823+
*/
824+
static void
825+
pm_locals_order(pm_locals_t *locals, pm_constant_id_list_t *list) {
826+
pm_constant_id_list_init_capacity(list, locals->size);
827+
828+
for (uint32_t index = 0; index < locals->capacity; index++) {
829+
pm_local_t *local = &locals->locals[index];
830+
831+
if (local->name != PM_CONSTANT_ID_UNSET) {
832+
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
833+
}
834+
}
835+
}
836+
675837
/******************************************************************************/
676838
/* Node-related functions */
677839
/******************************************************************************/
@@ -7045,7 +7207,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
70457207
int depth = 0;
70467208

70477209
while (scope != NULL) {
7048-
if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
7210+
if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
70497211
if (scope->closed) break;
70507212

70517213
scope = scope->previous;
@@ -7070,9 +7232,7 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
70707232
*/
70717233
static inline void
70727234
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
7073-
if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
7074-
pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
7075-
}
7235+
pm_locals_write(&parser->current_scope->locals, constant_id);
70767236
}
70777237

70787238
/**
@@ -7175,7 +7335,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
71757335
// whether it's already in the current scope.
71767336
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
71777337

7178-
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
7338+
if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
71797339
// Add an error if the parameter doesn't start with _ and has been seen before
71807340
if ((name->start < name->end) && (*name->start != '_')) {
71817341
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
@@ -7186,14 +7346,13 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
71867346
}
71877347

71887348
/**
7189-
* Pop the current scope off the scope stack. Note that we specifically do not
7190-
* free the associated constant list because we assume that we have already
7191-
* transferred ownership of the list to the AST somewhere.
7349+
* Pop the current scope off the scope stack.
71927350
*/
71937351
static void
71947352
pm_parser_scope_pop(pm_parser_t *parser) {
71957353
pm_scope_t *scope = parser->current_scope;
71967354
parser->current_scope = scope->previous;
7355+
pm_locals_free(&scope->locals);
71977356
xfree(scope);
71987357
}
71997358

@@ -13849,7 +14008,8 @@ parse_block(pm_parser_t *parser) {
1384914008
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
1385014009
}
1385114010

13852-
pm_constant_id_list_t locals = parser->current_scope->locals;
14011+
pm_constant_id_list_t locals;
14012+
pm_locals_order(&parser->current_scope->locals, &locals);
1385314013
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
1385414014

1385514015
pm_parser_scope_pop(parser);
@@ -17173,7 +17333,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1717317333
}
1717417334

1717517335
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
17176-
pm_constant_id_list_t locals = parser->current_scope->locals;
17336+
17337+
pm_constant_id_list_t locals;
17338+
pm_locals_order(&parser->current_scope->locals, &locals);
1717717339

1717817340
pm_parser_scope_pop(parser);
1717917341
pm_do_loop_stack_pop(parser);
@@ -17234,7 +17396,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1723417396
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
1723517397
}
1723617398

17237-
pm_constant_id_list_t locals = parser->current_scope->locals;
17399+
pm_constant_id_list_t locals;
17400+
pm_locals_order(&parser->current_scope->locals, &locals);
1723817401

1723917402
pm_parser_scope_pop(parser);
1724017403
pm_do_loop_stack_pop(parser);
@@ -17509,7 +17672,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1750917672
end_keyword = parser->previous;
1751017673
}
1751117674

17512-
pm_constant_id_list_t locals = parser->current_scope->locals;
17675+
pm_constant_id_list_t locals;
17676+
pm_locals_order(&parser->current_scope->locals, &locals);
1751317677

1751417678
pm_parser_scope_pop(parser);
1751517679
pm_parser_current_param_name_restore(parser, saved_param_name);
@@ -17774,7 +17938,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1777417938
statements = (pm_node_t *) parse_rescues_implicit_begin(parser, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE);
1777517939
}
1777617940

17777-
pm_constant_id_list_t locals = parser->current_scope->locals;
17941+
pm_constant_id_list_t locals;
17942+
pm_locals_order(&parser->current_scope->locals, &locals);
17943+
1777817944
pm_parser_scope_pop(parser);
1777917945
pm_parser_current_param_name_restore(parser, saved_param_name);
1778017946

@@ -18541,7 +18707,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1854118707
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
1854218708
}
1854318709

18544-
pm_constant_id_list_t locals = parser->current_scope->locals;
18710+
pm_constant_id_list_t locals;
18711+
pm_locals_order(&parser->current_scope->locals, &locals);
1854518712
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
1854618713

1854718714
pm_parser_scope_pop(parser);
@@ -19815,7 +19982,9 @@ parse_program(pm_parser_t *parser) {
1981519982
if (!statements) {
1981619983
statements = pm_statements_node_create(parser);
1981719984
}
19818-
pm_constant_id_list_t locals = parser->current_scope->locals;
19985+
19986+
pm_constant_id_list_t locals;
19987+
pm_locals_order(&parser->current_scope->locals, &locals);
1981919988
pm_parser_scope_pop(parser);
1982019989

1982119990
// If this is an empty file, then we're still going to parse all of the
@@ -20140,7 +20309,6 @@ pm_parser_free(pm_parser_t *parser) {
2014020309
// assumed that ownership has transferred to the AST. However if we have
2014120310
// scopes while we're freeing the parser, it's likely they came from
2014220311
// eval scopes and we need to free them explicitly here.
20143-
pm_constant_id_list_free(&parser->current_scope->locals);
2014420312
pm_parser_scope_pop(parser);
2014520313
}
2014620314

0 commit comments

Comments
 (0)