Skip to content

Commit

Permalink
[ruby/prism] Add some encoding debugging to make testing easier
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton authored and matzbot committed Feb 23, 2024
1 parent ce8531f commit ec6532b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 0 deletions.
43 changes: 43 additions & 0 deletions lib/prism/debug.rb
Expand Up @@ -202,5 +202,48 @@ def self.prism_locals(source)
def self.newlines(source)
Prism.parse(source).source.offsets
end

# A wrapping around prism's internal encoding data structures. This is used
# for reflection and debugging purposes.
class Encoding
# The name of the encoding, that can be passed to Encoding.find.
attr_reader :name

# Initialize a new encoding with the given name and whether or not it is
# a multibyte encoding.
def initialize(name, multibyte)
@name = name
@multibyte = multibyte
end

# Whether or not the encoding is a multibyte encoding.
def multibyte?
@multibyte
end

# Returns the number of bytes of the first character in the source string,
# if it is valid for the encoding. Otherwise, returns 0.
def width(source)
Encoding._width(name, source)
end

# Returns true if the first character in the source string is a valid
# alphanumeric character for the encoding.
def alnum?(source)
Encoding._alnum?(name, source)
end

# Returns true if the first character in the source string is a valid
# alphabetic character for the encoding.
def alpha?(source)
Encoding._alpha?(name, source)
end

# Returns true if the first character in the source string is a valid
# uppercase character for the encoding.
def upper?(source)
Encoding._upper?(name, source)
end
end
end
end
85 changes: 85 additions & 0 deletions prism/extension.c
Expand Up @@ -21,6 +21,8 @@ VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
VALUE rb_cPrismParseResult;

VALUE rb_cPrismDebugEncoding;

ID rb_option_id_filepath;
ID rb_option_id_encoding;
ID rb_option_id_line;
Expand Down Expand Up @@ -1102,6 +1104,80 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
return result;
}

/**
* call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
*
* Return an array of all of the encodings that prism knows about.
*/
static VALUE
encoding_all(VALUE self) {
VALUE encodings = rb_ary_new();

for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
const pm_encoding_t *encoding = &pm_encodings[index];

VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
}

return encodings;
}

static const pm_encoding_t *
encoding_find(VALUE name) {
const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
size_t length = RSTRING_LEN(name);

const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }

return encoding;
}

/**
* call-seq: Debug::Encoding.width(source) -> Integer
*
* Returns the width of the first character in the given string if it is valid
* in the encoding. If it is not, this function returns 0.
*/
static VALUE
encoding_char_width(VALUE self, VALUE name, VALUE value) {
return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
}

/**
* call-seq: Debug::Encoding.alnum?(source) -> true | false
*
* Returns true if the first character in the given string is an alphanumeric
* character in the encoding.
*/
static VALUE
encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
}

/**
* call-seq: Debug::Encoding.alpha?(source) -> true | false
*
* Returns true if the first character in the given string is an alphabetic
* character in the encoding.
*/
static VALUE
encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
}

/**
* call-seq: Debug::Encoding.upper?(source) -> true | false
*
* Returns true if the first character in the given string is an uppercase
* character in the encoding.
*/
static VALUE
encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
}

/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
Expand Down Expand Up @@ -1182,6 +1258,15 @@ Init_prism(void) {
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);

// Next, define the functions that are exposed through the private
// Debug::Encoding class.
rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);

// Next, initialize the other APIs.
Init_prism_api_node();
Init_prism_pack();
Expand Down

0 comments on commit ec6532b

Please sign in to comment.