From d1ce989829927a4215952983babadfd1df1b505f Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Fri, 23 Feb 2024 10:36:59 -0500
Subject: [PATCH] [ruby/prism] Duplicated hash keys

https://github.com/ruby/prism/commit/3e10c46c14
---
 prism/diagnostic.c                 |   1 +
 prism/diagnostic.h                 |   1 +
 prism/node.h                       |  18 +++
 prism/prism.c                      |  73 ++++++++--
 prism/prism.h                      |   1 +
 prism/static_literals.c            | 215 +++++++++++++++++++++++++++++
 prism/static_literals.h            | 109 +++++++++++++++
 prism/templates/src/node.c.erb     |  49 +++++--
 prism/util/pm_integer.c            |  23 +++
 prism/util/pm_integer.h            |  11 ++
 test/prism/newline_test.rb         |   2 +-
 test/prism/static_literals_test.rb |  68 +++++++++
 12 files changed, 545 insertions(+), 26 deletions(-)
 create mode 100644 prism/static_literals.c
 create mode 100644 prism/static_literals.h
 create mode 100644 test/prism/static_literals_test.rb

diff --git a/prism/diagnostic.c b/prism/diagnostic.c
index 2040387d807be0..1003ecd3393dec 100644
--- a/prism/diagnostic.c
+++ b/prism/diagnostic.c
@@ -306,6 +306,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS]     = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
     [PM_WARN_AMBIGUOUS_PREFIX_STAR]             = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
     [PM_WARN_AMBIGUOUS_SLASH]                   = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_DUPLICATED_HASH_KEY]               = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
     [PM_WARN_EQUAL_IN_CONDITIONAL]              = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
     [PM_WARN_END_IN_METHOD]                     = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
     [PM_WARN_FLOAT_OUT_OF_RANGE]                = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE }
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 7419c0e791ae52..85f15db65c5737 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -306,6 +306,7 @@ typedef enum {
     PM_WARN_AMBIGUOUS_SLASH,
     PM_WARN_EQUAL_IN_CONDITIONAL,
     PM_WARN_END_IN_METHOD,
+    PM_WARN_DUPLICATED_HASH_KEY,
     PM_WARN_FLOAT_OUT_OF_RANGE,
 
     // This is the number of diagnostic codes.
diff --git a/prism/node.h b/prism/node.h
index 8d1b6a599a3344..9c37c9decce569 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -10,6 +10,17 @@
 #include "prism/parser.h"
 #include "prism/util/pm_buffer.h"
 
+/**
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
+ *
+ * @param list The list to grow.
+ * @return True if the list was successfully grown, false otherwise.
+ */
+bool pm_node_list_grow(pm_node_list_t *list);
+
 /**
  * Append a new node onto the end of the node list.
  *
@@ -18,6 +29,13 @@
  */
 void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
 
+/**
+ * Free the internal memory associated with the given node list.
+ *
+ * @param list The list to free.
+ */
+void pm_node_list_free(pm_node_list_t *list);
+
 /**
  * Deallocate a node and all of its children.
  *
diff --git a/prism/prism.c b/prism/prism.c
index 8bfc6ed6d1ffc7..fe1ba8d971c482 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -11676,11 +11676,32 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
     return statements;
 }
 
+/**
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+    const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
+
+    if (duplicated != NULL) {
+        pm_diagnostic_list_append_format(
+            &parser->warning_list,
+            duplicated->location.start,
+            duplicated->location.end,
+            PM_WARN_DUPLICATED_HASH_KEY,
+            (int) (duplicated->location.end - duplicated->location.start),
+            duplicated->location.start,
+            pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+        );
+    }
+}
+
 /**
  * Parse all of the elements of a hash. returns true if a double splat was found.
  */
 static bool
-parse_assocs(pm_parser_t *parser, pm_node_t *node) {
+parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
     assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
     bool contains_keyword_splat = false;
 
@@ -11709,6 +11730,8 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
                 parser_lex(parser);
 
                 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+                pm_hash_key_static_literals_add(parser, literals, key);
+
                 pm_token_t operator = not_provided(parser);
                 pm_node_t *value = NULL;
 
@@ -11738,8 +11761,16 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
             }
             default: {
                 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
-                pm_token_t operator;
 
+                // Hash keys that are strings are automatically frozen. We will
+                // mark that here.
+                if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+                    pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+                }
+
+                pm_hash_key_static_literals_add(parser, literals, key);
+
+                pm_token_t operator;
                 if (pm_symbol_node_label_p(key)) {
                     operator = not_provided(parser);
                 } else {
@@ -11773,6 +11804,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
         // Otherwise by default we will exit out of this loop.
         break;
     }
+
     return contains_keyword_splat;
 }
 
@@ -11830,12 +11862,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
                 argument = (pm_node_t *) hash;
 
-                bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
-                parsed_bare_hash = true;
+                pm_static_literals_t literals = { 0 };
+                bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
+
                 parse_arguments_append(parser, arguments, argument);
                 if (contains_keyword_splat) {
                     pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
                 }
+
+                pm_static_literals_free(&literals);
+                parsed_bare_hash = true;
+
                 break;
             }
             case PM_TOKEN_UAMPERSAND: {
@@ -11925,10 +11962,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
 
                     pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
 
+                    // Create the set of static literals for this hash.
+                    pm_static_literals_t literals = { 0 };
+                    pm_hash_key_static_literals_add(parser, &literals, argument);
+
                     // Finish parsing the one we are part way through
                     pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
-
                     argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
+
                     pm_keyword_hash_node_elements_append(bare_hash, argument);
                     argument = (pm_node_t *) bare_hash;
 
@@ -11937,9 +11978,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                         token_begins_expression_p(parser->current.type) ||
                         match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
                     )) {
-                        contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
+                        contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
                     }
 
+                    pm_static_literals_free(&literals);
                     parsed_bare_hash = true;
                 } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
                     // TODO: Could we solve this with binding powers instead?
@@ -14661,13 +14703,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
                     }
 
-                    pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
-                    element = (pm_node_t *)hash;
+                    element = (pm_node_t *) pm_keyword_hash_node_create(parser);
+                    pm_static_literals_t literals = { 0 };
 
                     if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        parse_assocs(parser, (pm_node_t *) hash);
+                        parse_assocs(parser, &literals, element);
                     }
 
+                    pm_static_literals_free(&literals);
                     parsed_bare_hash = true;
                 } else {
                     element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -14678,6 +14721,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         }
 
                         pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
+                        pm_static_literals_t literals = { 0 };
+                        pm_hash_key_static_literals_add(parser, &literals, element);
 
                         pm_token_t operator;
                         if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -14690,11 +14735,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                         pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
                         pm_keyword_hash_node_elements_append(hash, assoc);
 
-                        element = (pm_node_t *)hash;
+                        element = (pm_node_t *) hash;
                         if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
-                            parse_assocs(parser, (pm_node_t *) hash);
+                            parse_assocs(parser, &literals, element);
                         }
 
+                        pm_static_literals_free(&literals);
                         parsed_bare_hash = true;
                     }
                 }
@@ -14840,10 +14886,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
         case PM_TOKEN_BRACE_LEFT: {
             pm_accepts_block_stack_push(parser, true);
             parser_lex(parser);
+
             pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+            pm_static_literals_t literals = { 0 };
 
             if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
-                parse_assocs(parser, (pm_node_t *) node);
+                parse_assocs(parser, &literals, (pm_node_t *) node);
                 accept1(parser, PM_TOKEN_NEWLINE);
             }
 
@@ -14851,6 +14899,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
             pm_hash_node_closing_loc_set(node, &parser->previous);
 
+            pm_static_literals_free(&literals);
             return (pm_node_t *) node;
         }
         case PM_TOKEN_CHARACTER_LITERAL: {
diff --git a/prism/prism.h b/prism/prism.h
index e24dbf5cad06e8..7d9b96fa829e99 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -21,6 +21,7 @@
 #include "prism/parser.h"
 #include "prism/prettyprint.h"
 #include "prism/regexp.h"
+#include "prism/static_literals.h"
 #include "prism/version.h"
 
 #include <assert.h>
diff --git a/prism/static_literals.c b/prism/static_literals.c
new file mode 100644
index 00000000000000..0fab4e98a3c108
--- /dev/null
+++ b/prism/static_literals.c
@@ -0,0 +1,215 @@
+#include "prism/static_literals.h"
+
+/**
+ * Insert a node into the given sorted list. This will return false if the node
+ * was not already in the list, and true if it was.
+ */
+static pm_node_t *
+pm_node_list_insert(const pm_parser_t *parser, pm_node_list_t *list, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) {
+    // TODO: This would be much more efficient with a binary search.
+    size_t index = 0;
+    while (index < list->size) {
+        int result = compare(parser, list->nodes[index], node);
+
+        // If we find a match, then replace the node and return the old one.
+        if (result == 0) {
+            pm_node_t *result = list->nodes[index];
+            list->nodes[index] = node;
+            return result;
+        }
+
+        if (result > 0) break;
+        index++;
+    }
+
+    pm_node_list_grow(list);
+    memmove(&list->nodes[index + 1], &list->nodes[index], (list->size - index) * sizeof(pm_node_t *));
+
+    list->nodes[index] = node;
+    list->size++;
+
+    return NULL;
+}
+
+/**
+ * Compare two values that can be compared with a simple numeric comparison.
+ */
+#define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
+
+/**
+ * Return the integer value of the given node as an int64_t.
+ */
+static int64_t
+pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE: {
+            const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
+            if (integer->length > 0) return integer->negative ? INT64_MIN : INT64_MAX;
+
+            int64_t value = (int64_t) integer->head.value;
+            return integer->negative ? -value : value;
+        }
+        case PM_SOURCE_LINE_NODE:
+            return (int64_t) pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line;
+        default:
+            assert(false && "unreachable");
+            return 0;
+    }
+}
+
+/**
+ * A comparison function for comparing two IntegerNode or SourceLineNode
+ * instances.
+ */
+static int
+pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+    if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
+        int64_t left_value = pm_int64_value(parser, left);
+        int64_t right_value = pm_int64_value(parser, right);
+        return PM_NUMERIC_COMPARISON(left_value, right_value);
+    }
+
+    const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
+    const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
+    return pm_integer_compare(left_integer, right_integer);
+}
+
+/**
+ * A comparison function for comparing two FloatNode instances.
+ */
+static int
+pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+    const double left_value = ((const pm_float_node_t *) left)->value;
+    const double right_value = ((const pm_float_node_t *) right)->value;
+    return PM_NUMERIC_COMPARISON(left_value, right_value);
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached numbers.
+ */
+static int
+pm_compare_number_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+    if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
+        return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
+    }
+
+    switch (PM_NODE_TYPE(left)) {
+        case PM_IMAGINARY_NODE:
+            return pm_compare_number_nodes(parser, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
+        case PM_RATIONAL_NODE:
+            return pm_compare_number_nodes(parser, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
+        case PM_INTEGER_NODE:
+            return pm_compare_integer_nodes(parser, left, right);
+        case PM_FLOAT_NODE:
+            return pm_compare_float_nodes(parser, left, right);
+        default:
+            assert(false && "unreachable");
+            return 0;
+    }
+}
+
+/**
+ * Return a pointer to the string value of the given node.
+ */
+static const pm_string_t *
+pm_string_value(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_STRING_NODE:
+            return &((const pm_string_node_t *) node)->unescaped;
+        case PM_SOURCE_FILE_NODE:
+            return &((const pm_source_file_node_t *) node)->filepath;
+        case PM_SYMBOL_NODE:
+            return &((const pm_symbol_node_t *) node)->unescaped;
+        default:
+            assert(false && "unreachable");
+            return NULL;
+    }
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached strings.
+ */
+static int
+pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+    const pm_string_t *left_string = pm_string_value(left);
+    const pm_string_t *right_string = pm_string_value(right);
+    return pm_string_compare(left_string, right_string);
+}
+
+/**
+ * A comparison function for comparing two RegularExpressionNode instances.
+ */
+static int
+pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+    const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
+    const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
+
+    int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
+    if (result != 0) return result;
+
+    return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
+}
+
+#undef PM_NUMERIC_COMPARISON
+
+/**
+ * Add a node to the set of static literals.
+ */
+pm_node_t *
+pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+    if (!PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return NULL;
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE:
+        case PM_SOURCE_LINE_NODE:
+            return pm_node_list_insert(parser, &literals->integer_nodes, node, pm_compare_integer_nodes);
+        case PM_FLOAT_NODE:
+            return pm_node_list_insert(parser, &literals->float_nodes, node, pm_compare_float_nodes);
+        case PM_RATIONAL_NODE:
+        case PM_IMAGINARY_NODE:
+            return pm_node_list_insert(parser, &literals->rational_nodes, node, pm_compare_number_nodes);
+        case PM_STRING_NODE:
+        case PM_SOURCE_FILE_NODE:
+            return pm_node_list_insert(parser, &literals->string_nodes, node, pm_compare_string_nodes);
+        case PM_REGULAR_EXPRESSION_NODE:
+            return pm_node_list_insert(parser, &literals->regexp_nodes, node, pm_compare_regular_expression_nodes);
+        case PM_SYMBOL_NODE:
+            return pm_node_list_insert(parser, &literals->symbol_nodes, node, pm_compare_string_nodes);
+        case PM_TRUE_NODE: {
+            pm_node_t *duplicated = literals->true_node;
+            literals->true_node = node;
+            return duplicated;
+        }
+        case PM_FALSE_NODE: {
+            pm_node_t *duplicated = literals->false_node;
+            literals->false_node = node;
+            return duplicated;
+        }
+        case PM_NIL_NODE: {
+            pm_node_t *duplicated = literals->nil_node;
+            literals->nil_node = node;
+            return duplicated;
+        }
+        case PM_SOURCE_ENCODING_NODE: {
+            pm_node_t *duplicated = literals->source_encoding_node;
+            literals->source_encoding_node = node;
+            return duplicated;
+        }
+        default:
+            return NULL;
+    }
+}
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ */
+void
+pm_static_literals_free(pm_static_literals_t *literals) {
+    pm_node_list_free(&literals->integer_nodes);
+    pm_node_list_free(&literals->float_nodes);
+    pm_node_list_free(&literals->rational_nodes);
+    pm_node_list_free(&literals->imaginary_nodes);
+    pm_node_list_free(&literals->string_nodes);
+    pm_node_list_free(&literals->regexp_nodes);
+    pm_node_list_free(&literals->symbol_nodes);
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
new file mode 100644
index 00000000000000..837d355985c6a6
--- /dev/null
+++ b/prism/static_literals.h
@@ -0,0 +1,109 @@
+/**
+ * @file static_literals.h
+ *
+ * A set of static literal nodes that can be checked for duplicates.
+ */
+#ifndef PRISM_STATIC_LITERALS_H
+#define PRISM_STATIC_LITERALS_H
+
+#include "prism/defines.h"
+#include "prism/ast.h"
+#include "prism/node.h"
+#include "prism/parser.h"
+
+#include <assert.h>
+#include <stdbool.h>
+
+/**
+ * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
+ * to alert the user of potential issues. To do this, we keep a set of the nodes
+ * that have been seen so far, and compare whenever we find a new node.
+ *
+ * We bucket the nodes based on their type to minimize the number of comparisons
+ * that need to be performed.
+ */
+typedef struct {
+    /**
+     * This is the set of IntegerNode and SourceLineNode instances. We store
+     * them in a sorted list so that we can binary search through them to find
+     * duplicates.
+     */
+    pm_node_list_t integer_nodes;
+
+    /**
+     * This is the set of FloatNode instances. We store them in a sorted list so
+     * that we can binary search through them to find duplicates.
+     */
+    pm_node_list_t float_nodes;
+
+    /**
+     * This is the set of RationalNode instances. We store them in a flat list
+     * that must be searched linearly.
+     */
+    pm_node_list_t rational_nodes;
+
+    /**
+     * This is the set of ImaginaryNode instances. We store them in a flat list
+     * that must be searched linearly.
+     */
+    pm_node_list_t imaginary_nodes;
+
+    /**
+     * This is the set of StringNode and SourceFileNode instances. We store them
+     * in a sorted list so that we can binary search through them to find
+     * duplicates.
+     */
+    pm_node_list_t string_nodes;
+
+    /**
+     * This is the set of RegularExpressionNode instances. We store them in a
+     * sorted list so that we can binary search through them to find duplicates.
+     */
+    pm_node_list_t regexp_nodes;
+
+    /**
+     * This is the set of SymbolNode instances. We store them in a sorted list
+     * so that we can binary search through them to find duplicates.
+     */
+    pm_node_list_t symbol_nodes;
+
+    /**
+     * A pointer to the last TrueNode instance that was inserted, or NULL.
+     */
+    pm_node_t *true_node;
+
+    /**
+     * A pointer to the last FalseNode instance that was inserted, or NULL.
+     */
+    pm_node_t *false_node;
+
+    /**
+     * A pointer to the last NilNode instance that was inserted, or NULL.
+     */
+    pm_node_t *nil_node;
+
+    /**
+     * A pointer to the last SourceEncodingNode instance that was inserted, or
+     * NULL.
+     */
+    pm_node_t *source_encoding_node;
+} pm_static_literals_t;
+
+/**
+ * Add a node to the set of static literals.
+ *
+ * @param parser The parser that created the node.
+ * @param literals The set of static literals to add the node to.
+ * @param node The node to add to the set.
+ * @return A pointer to the node that is being overwritten, if there is one.
+ */
+pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node);
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ *
+ * @param literals The set of static literals to free.
+ */
+void pm_static_literals_free(pm_static_literals_t *literals);
+
+#endif
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 89c73451e8e457..554645c3e78629 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -17,32 +17,55 @@ pm_node_list_memsize(pm_node_list_t *node_list, pm_memsize_t *memsize) {
 }
 
 /**
- * Append a new node onto the end of the node list.
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
  */
-void
-pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
+bool
+pm_node_list_grow(pm_node_list_t *list) {
     if (list->size == list->capacity) {
         list->capacity = list->capacity == 0 ? 4 : list->capacity * 2;
         list->nodes = (pm_node_t **) realloc(list->nodes, sizeof(pm_node_t *) * list->capacity);
+        return list->nodes != NULL;
+    }
+    return true;
+}
+
+/**
+ * Append a new node onto the end of the node list.
+ */
+void
+pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
+    if (pm_node_list_grow(list)) {
+        list->nodes[list->size++] = node;
+    }
+}
+
+/**
+ * Free the internal memory associated with the given node list.
+ */
+void
+pm_node_list_free(pm_node_list_t *list) {
+    if (list->capacity > 0) {
+        free(list->nodes);
+        *list = (pm_node_list_t) { 0 };
     }
-    list->nodes[list->size++] = node;
 }
 
 PRISM_EXPORTED_FUNCTION void
 pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
 
 /**
- * Deallocate the inner memory of a list of nodes. The parser argument is not
- * used, but is here for the future possibility of pre-allocating memory pools.
+ * Destroy the nodes that are contained within the given node list.
  */
 static void
-pm_node_list_free(pm_parser_t *parser, pm_node_list_t *list) {
-    if (list->capacity > 0) {
-        for (size_t index = 0; index < list->size; index++) {
-            pm_node_destroy(parser, list->nodes[index]);
-        }
-        free(list->nodes);
+pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
+    for (size_t index = 0; index < list->size; index++) {
+        pm_node_destroy(parser, list->nodes[index]);
     }
+
+    pm_node_list_free(list);
 }
 
 /**
@@ -71,7 +94,7 @@ pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
             <%- when Prism::StringField -%>
             pm_string_free(&cast-><%= field.name %>);
             <%- when Prism::NodeListField -%>
-            pm_node_list_free(parser, &cast-><%= field.name %>);
+            pm_node_list_destroy(parser, &cast-><%= field.name %>);
             <%- when Prism::ConstantListField -%>
             pm_constant_id_list_free(&cast-><%= field.name %>);
             <%- when Prism::IntegerField -%>
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
index f08078356a531a..720dd608727acd 100644
--- a/prism/util/pm_integer.c
+++ b/prism/util/pm_integer.c
@@ -152,6 +152,29 @@ pm_integer_memsize(const pm_integer_t *integer) {
     return sizeof(pm_integer_t) + integer->length * sizeof(pm_integer_word_t);
 }
 
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int
+pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
+    if (left->length < right->length) return -1;
+    if (left->length > right->length) return 1;
+
+    for (
+        const pm_integer_word_t *left_word = &left->head, *right_word = &right->head;
+        left_word != NULL && right_word != NULL;
+        left_word = left_word->next, right_word = right_word->next
+    ) {
+        if (left_word->value < right_word->value) return -1;
+        if (left_word->value > right_word->value) return 1;
+    }
+
+    return 0;
+
+}
+
 /**
  * Recursively destroy the linked list of an integer.
  */
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
index 5525adabb03bb8..a80db633bbebd1 100644
--- a/prism/util/pm_integer.h
+++ b/prism/util/pm_integer.h
@@ -93,6 +93,17 @@ PRISM_EXPORTED_FUNCTION void pm_integer_parse(pm_integer_t *integer, pm_integer_
  */
 size_t pm_integer_memsize(const pm_integer_t *integer);
 
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ *
+ * @param left The left integer to compare.
+ * @param right The right integer to compare.
+ * @return The result of the comparison.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
 /**
  * Free the internal memory of an integer. This memory will only be allocated if
  * the integer exceeds the size of a single node in the linked list.
diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb
index eea69ec5e12f3d..e9975b346e8e31 100644
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@@ -7,7 +7,7 @@
 module Prism
   class NewlineTest < TestCase
     base = File.expand_path("../", __FILE__)
-    filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb errors_test.rb parser_test.rb unescape_test.rb]
+    filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb errors_test.rb parser_test.rb static_literals_test.rb unescape_test.rb]
 
     filepaths.each do |relative|
       define_method("test_newline_flags_#{relative}") do
diff --git a/test/prism/static_literals_test.rb b/test/prism/static_literals_test.rb
new file mode 100644
index 00000000000000..a35bb1cd49c43e
--- /dev/null
+++ b/test/prism/static_literals_test.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+  class StaticLiteralsTest < TestCase
+    def test_static_literals
+      assert_warning("1")
+      assert_warning("0xA", "10")
+      assert_warning("0o10", "8")
+      assert_warning("0b10", "2")
+      assert_warning("1_000")
+      assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}")
+      assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}")
+
+      assert_warning("__LINE__", "2")
+      assert_warning("3", "__LINE__")
+
+      assert_warning("1.0")
+      assert_warning("1e2", "100.0")
+
+      assert_warning("1r")
+      assert_warning("1.0r")
+
+      assert_warning("1i")
+      assert_warning("1.0i")
+
+      assert_warning("1ri")
+      assert_warning("1.0ri")
+
+      assert_warning("\"#{__FILE__}\"")
+      assert_warning("\"foo\"")
+      assert_warning("\"#{__FILE__}\"", "__FILE__")
+
+      assert_warning("/foo/")
+      refute_warning("/foo/", "/foo/i")
+
+      assert_warning(":foo")
+      assert_warning("%s[foo]")
+
+      assert_warning("true")
+      assert_warning("false")
+      assert_warning("nil")
+      assert_warning("__ENCODING__")
+    end
+
+    private
+
+    def parse_warning(left, right)
+      source = <<~RUBY
+        {
+          #{left} => 1,
+          #{right} => 2
+        }
+      RUBY
+
+      Prism.parse(source, filepath: __FILE__).warnings.first
+    end
+
+    def assert_warning(left, right = left)
+      assert_match %r{key #{Regexp.escape(left)} .+ line 3}, parse_warning(left, right)&.message
+    end
+
+    def refute_warning(left, right)
+      assert_nil parse_warning(left, right)
+    end
+  end
+end