Skip to content

Commit 5acc38a

Browse files
authored
Remove non-ASCII source characters (#1787)
1 parent 519653a commit 5acc38a

File tree

3 files changed

+33
-24
lines changed

3 files changed

+33
-24
lines changed

src/regexp.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,18 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
116116
* The properly track everything, we're going to build a little state machine.
117117
* It looks something like the following:
118118
*
119-
* ┌───────┐ ┌─────────┐ ────────────┐
120-
* ──── lbrace ───> │ start │ ──── digit ───> │ minimum
121-
* └───────┘ └─────────┘ <─── digit ─┘
122-
*
123-
* ┌───────┐ rbrace
124-
* comma │ <───── comma ┌──── comma ───────┘
125-
* └───────┘ V V
126-
* ┌─────────┐ ┌─────────┐
127-
* └── digit ──> │ maximum │ ── rbrace ──> │| final |
128-
* └─────────┘ └─────────┘
129-
* ^
130-
* └─ digit ─┘
119+
* +-------+ +---------+ ------------+
120+
* ---- lbrace ---> | start | ---- digit ---> | minimum | |
121+
* +-------+ +---------+ <--- digit -+
122+
* | | |
123+
* +-------+ | | rbrace
124+
* | comma | <----- comma +---- comma -------+ |
125+
* +-------+ V V
126+
* | +---------+ +---------+
127+
* +-- digit --> | maximum | -- rbrace --> || final ||
128+
* +---------+ +---------+
129+
* | ^
130+
* +- digit -+
131131
*
132132
* Note that by the time we've hit this function, the lbrace has already been
133133
* consumed so we're in the start state.

templates/src/prettyprint.c.erb

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
<%# encoding: ASCII -%>
12
#include "prism/prettyprint.h"
23

34
static void
@@ -63,29 +64,29 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
6364
prettyprint_location(output_buffer, parser, &node->location);
6465
pm_buffer_append_string(output_buffer, ")\n", 2);
6566
<%- node.fields.each_with_index do |field, index| -%>
66-
<%- pointer, preadd = index == node.fields.length - 1 ? ["└── ", " "] : ["├── ", " "] -%>
67+
<%- pointer, preadd, preadd_bytesize = index == node.fields.length - 1 ? ["\\xe2\\x94\\x94\\xe2\\x94\\x80\\xe2\\x94\\x80 ", " ", 4] : ["\\xe2\\x94\\x9c\\xe2\\x94\\x80\\xe2\\x94\\x80 ", "\\xe2\\x94\\x82 ", 6] -%>
6768

6869
// <%= field.name %>
6970
{
7071
pm_buffer_concat(output_buffer, prefix_buffer);
71-
pm_buffer_append_string(output_buffer, "<%= pointer %><%= field.name %>:", <%= pointer.bytesize + field.name.length + 1 %>);
72+
pm_buffer_append_string(output_buffer, "<%= pointer %><%= field.name %>:", <%= 10 + field.name.length + 1 %>);
7273
<%- case field -%>
7374
<%- when Prism::NodeField -%>
7475
pm_buffer_append_byte(output_buffer, '\n');
7576

7677
size_t prefix_length = prefix_buffer->length;
77-
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd.bytesize %>);
78+
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
7879
pm_buffer_concat(output_buffer, prefix_buffer);
7980
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
8081
prefix_buffer->length = prefix_length;
8182
<%- when Prism::OptionalNodeField -%>
8283
if (cast-><%= field.name %> == NULL) {
83-
pm_buffer_append_string(output_buffer, " \n", 5);
84+
pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
8485
} else {
8586
pm_buffer_append_byte(output_buffer, '\n');
8687

8788
size_t prefix_length = prefix_buffer->length;
88-
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd.bytesize %>);
89+
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
8990
pm_buffer_concat(output_buffer, prefix_buffer);
9091
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
9192
prefix_buffer->length = prefix_length;
@@ -100,15 +101,15 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
100101
size_t last_index = cast-><%= field.name %>.size;
101102
for (uint32_t index = 0; index < last_index; index++) {
102103
size_t prefix_length = prefix_buffer->length;
103-
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd.bytesize %>);
104+
pm_buffer_append_string(prefix_buffer, "<%= preadd %>", <%= preadd_bytesize %>);
104105
pm_buffer_concat(output_buffer, prefix_buffer);
105106

106107
if (index == last_index - 1) {
107-
pm_buffer_append_string(output_buffer, "└── ", 10);
108+
pm_buffer_append_string(output_buffer, "\xe2\x94\x94\xe2\x94\x80\xe2\x94\x80 ", 10);
108109
pm_buffer_append_string(prefix_buffer, " ", 4);
109110
} else {
110-
pm_buffer_append_string(output_buffer, "├── ", 10);
111-
pm_buffer_append_string(prefix_buffer, " ", 6);
111+
pm_buffer_append_string(output_buffer, "\xe2\x94\x9c\xe2\x94\x80\xe2\x94\x80 ", 10);
112+
pm_buffer_append_string(prefix_buffer, "\xe2\x94\x82 ", 6);
112113
}
113114

114115
prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>.nodes[index], prefix_buffer);
@@ -120,7 +121,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
120121
pm_buffer_append_byte(output_buffer, '\n');
121122
<%- when Prism::OptionalConstantField -%>
122123
if (cast-><%= field.name %> == 0) {
123-
pm_buffer_append_string(output_buffer, " \n", 5);
124+
pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
124125
} else {
125126
pm_buffer_append_byte(output_buffer, ' ');
126127
prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
@@ -143,7 +144,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
143144
<%- when Prism::OptionalLocationField -%>
144145
pm_location_t *location = &cast-><%= field.name %>;
145146
if (location->start == NULL) {
146-
pm_buffer_append_string(output_buffer, " \n", 5);
147+
pm_buffer_append_string(output_buffer, " \xe2\x88\x85\n", 5);
147148
} else {
148149
pm_buffer_append_byte(output_buffer, ' ');
149150
prettyprint_location(output_buffer, parser, location);
@@ -163,7 +164,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
163164
found = true;
164165
}
165166
<%- end -%>
166-
if (!found) pm_buffer_append_string(output_buffer, " ", 4);
167+
if (!found) pm_buffer_append_string(output_buffer, " \xe2\x88\x85", 4);
167168
pm_buffer_append_byte(output_buffer, '\n');
168169
<%- else -%>
169170
<%- raise -%>

templates/template.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,14 @@ def template(name, write_to: nil)
374374
write_to ||= File.expand_path("../#{name}", __dir__)
375375
contents = heading + erb.result_with_hash(locals)
376376

377+
if (extension == ".c" || extension == ".h") && !contents.ascii_only?
378+
# Enforce that we only have ASCII characters here. This is necessary
379+
# for some locales that only allow ASCII characters in C source files.
380+
contents.each_line.with_index(1) do |line, line_number|
381+
raise "Non-ASCII character on line #{line_number} of #{write_to}" unless line.ascii_only?
382+
end
383+
end
384+
377385
FileUtils.mkdir_p(File.dirname(write_to))
378386
File.write(write_to, contents)
379387
end

0 commit comments

Comments
 (0)