diff --git a/bin/template b/bin/template index 290c872a08..bb48a395d9 100755 --- a/bin/template +++ b/bin/template @@ -193,9 +193,9 @@ nodes = config.fetch("nodes").map { |node| NodeType.new(node) }.sort_by(&:name) tokens = config.fetch("tokens").map { |token| Token.new(token) } template("../ext/yarp/extension.c", nodes, cmark) -template("../ext/yarp/yarp.h", nodes, cmark) template("../ext/yarp/yarp.c", nodes, cmark) template("../lib/yarp.rb", nodes, rbmark) pure_template(template: 'bin/templates/token_type.h.erb', write_to: 'ext/yarp/token_type.h', locals: { tokens: tokens }) pure_template(template: 'bin/templates/token_type.c.erb', write_to: 'ext/yarp/token_type.c', locals: { tokens: tokens }) +pure_template(template: 'bin/templates/node.h.erb', write_to: 'ext/yarp/node.h', locals: { nodes: nodes }) diff --git a/bin/templates/yarp.h.erb b/bin/templates/node.h.erb similarity index 70% rename from bin/templates/yarp.h.erb rename to bin/templates/node.h.erb index e513204362..08be1e1c51 100644 --- a/bin/templates/yarp.h.erb +++ b/bin/templates/node.h.erb @@ -1,6 +1,9 @@ -/******************************************************************************/ -/* BEGIN TEMPLATE */ -/******************************************************************************/ +#ifndef YARP_NODE_H +#define YARP_NODE_H + +#include "location.h" +#include "token.h" +#include typedef enum { <%- nodes.each do |node| -%> @@ -8,6 +11,14 @@ typedef enum { <%- end -%> } yp_node_type_t; +struct yp_node; +struct yp_node; +typedef struct yp_node_list { + struct yp_node **nodes; + size_t size; + size_t capacity; +} yp_node_list_t; + // This is the overall tagged union representing a node in the syntax tree. typedef struct yp_node { // This represents the type of the node. It somewhat maps to the nodes that @@ -36,6 +47,4 @@ typedef struct yp_node { } as; } yp_node_t; -/******************************************************************************/ -/* END TEMPLATE */ -/******************************************************************************/ +#endif // YARP_NODE_H diff --git a/ext/yarp/location.h b/ext/yarp/location.h new file mode 100644 index 0000000000..269b16218b --- /dev/null +++ b/ext/yarp/location.h @@ -0,0 +1,13 @@ +#ifndef YARP_LOCATION_H +#define YARP_LOCATION_H + +#include + +// This represents a range of bytes in the source string to which a node or +// token corresponds. +typedef struct { + uint64_t start; + uint64_t end; +} yp_location_t; + +#endif // YARP_LOCATION_H diff --git a/ext/yarp/node.h b/ext/yarp/node.h new file mode 100644 index 0000000000..f61f21c895 --- /dev/null +++ b/ext/yarp/node.h @@ -0,0 +1,169 @@ +#ifndef YARP_NODE_H +#define YARP_NODE_H + +#include "location.h" +#include "token.h" +#include + +typedef enum { + YP_NODE_ASSIGNMENT, + YP_NODE_BINARY, + YP_NODE_CHARACTER_LITERAL, + YP_NODE_FLOAT_LITERAL, + YP_NODE_IDENTIFIER, + YP_NODE_IF_MODIFIER, + YP_NODE_IMAGINARY_LITERAL, + YP_NODE_INTEGER_LITERAL, + YP_NODE_OPERATOR_ASSIGNMENT, + YP_NODE_PROGRAM, + YP_NODE_RATIONAL_LITERAL, + YP_NODE_REDO, + YP_NODE_RETRY, + YP_NODE_STATEMENTS, + YP_NODE_TERNARY, + YP_NODE_UNLESS_MODIFIER, + YP_NODE_UNTIL_MODIFIER, + YP_NODE_VARIABLE_REFERENCE, + YP_NODE_WHILE_MODIFIER, +} yp_node_type_t; + +struct yp_node; +struct yp_node; +typedef struct yp_node_list { + struct yp_node **nodes; + size_t size; + size_t capacity; +} yp_node_list_t; + +// This is the overall tagged union representing a node in the syntax tree. +typedef struct yp_node { + // This represents the type of the node. It somewhat maps to the nodes that + // existed in the original grammar and ripper, but it's not a 1:1 mapping. + yp_node_type_t type; + + // This is the location of the node in the source. It's a range of bytes + // containing a start and an end. + yp_location_t location; + + // Every entry in this union is a different kind of node in the tree. For + // the most part they only contain one or two child nodes, except for the + // more complicated nodes like params. There may be an opportunity for + // optimization here by combining node types that share the same shape, but + // it might not end up mattering in the final compiled code. + union { + // Assignment + struct { + struct yp_node *target; + yp_token_t operator; + struct yp_node *value; + } assignment; + + // Binary + struct { + struct yp_node *left; + yp_token_t operator; + struct yp_node *right; + } binary; + + // CharacterLiteral + struct { + yp_token_t value; + } character_literal; + + // FloatLiteral + struct { + yp_token_t value; + } float_literal; + + // Identifier + struct { + yp_token_t value; + } identifier; + + // IfModifier + struct { + struct yp_node *statement; + yp_token_t keyword; + struct yp_node *predicate; + } if_modifier; + + // ImaginaryLiteral + struct { + yp_token_t value; + } imaginary_literal; + + // IntegerLiteral + struct { + yp_token_t value; + } integer_literal; + + // OperatorAssignment + struct { + struct yp_node *target; + yp_token_t operator; + struct yp_node *value; + } operator_assignment; + + // Program + struct { + struct yp_node *statements; + } program; + + // RationalLiteral + struct { + yp_token_t value; + } rational_literal; + + // Redo + struct { + yp_token_t value; + } redo; + + // Retry + struct { + yp_token_t value; + } retry; + + // Statements + struct { + struct yp_node_list *body; + } statements; + + // Ternary + struct { + struct yp_node *predicate; + yp_token_t question_mark; + struct yp_node *true_expression; + yp_token_t colon; + struct yp_node *false_expression; + } ternary; + + // UnlessModifier + struct { + struct yp_node *statement; + yp_token_t keyword; + struct yp_node *predicate; + } unless_modifier; + + // UntilModifier + struct { + struct yp_node *statement; + yp_token_t keyword; + struct yp_node *predicate; + } until_modifier; + + // VariableReference + struct { + yp_token_t value; + } variable_reference; + + // WhileModifier + struct { + struct yp_node *statement; + yp_token_t keyword; + struct yp_node *predicate; + } while_modifier; + } as; +} yp_node_t; + +#endif // YARP_NODE_H diff --git a/ext/yarp/token.h b/ext/yarp/token.h new file mode 100644 index 0000000000..644cd43f35 --- /dev/null +++ b/ext/yarp/token.h @@ -0,0 +1,14 @@ +#ifndef YARP_TOKEN_H +#define YARP_TOKEN_H + +#include "token_type.h" + +// This struct represents a token in the Ruby source. We use it to track both +// type and location information. +typedef struct { + yp_token_type_t type; + const char *start; + const char *end; +} yp_token_t; + +#endif // YARP_TOKEN_H diff --git a/ext/yarp/yarp.h b/ext/yarp/yarp.h index 1fc3db6c17..f8ff9233ee 100644 --- a/ext/yarp/yarp.h +++ b/ext/yarp/yarp.h @@ -1,7 +1,9 @@ #ifndef YARP_H #define YARP_H -#include "token_type.h" +#include "location.h" +#include "node.h" +#include "token.h" #include #include #include @@ -13,14 +15,6 @@ #include #include -// This struct represents a token in the Ruby source. We use it to track both -// type and location information. -typedef struct { - yp_token_type_t type; - const char *start; - const char *end; -} yp_token_t; - // When lexing Ruby source, the lexer has a small amount of state to tell which // kind of token it is currently lexing. For example, when we find the start of // a string, the first token that we return is a TOKEN_STRING_BEGIN token. After @@ -112,181 +106,6 @@ struct yp_parser { yp_error_handler_t *error_handler; // the error handler }; -// This represents a range of bytes in the source string to which a node or -// token corresponds. -typedef struct { - uint64_t start; - uint64_t end; -} yp_location_t; - -struct yp_node; -typedef struct yp_node_list { - struct yp_node **nodes; - size_t size; - size_t capacity; -} yp_node_list_t; - -/******************************************************************************/ -/* BEGIN TEMPLATE */ -/******************************************************************************/ - -typedef enum { - YP_NODE_ASSIGNMENT, - YP_NODE_BINARY, - YP_NODE_CHARACTER_LITERAL, - YP_NODE_FLOAT_LITERAL, - YP_NODE_IDENTIFIER, - YP_NODE_IF_MODIFIER, - YP_NODE_IMAGINARY_LITERAL, - YP_NODE_INTEGER_LITERAL, - YP_NODE_OPERATOR_ASSIGNMENT, - YP_NODE_PROGRAM, - YP_NODE_RATIONAL_LITERAL, - YP_NODE_REDO, - YP_NODE_RETRY, - YP_NODE_STATEMENTS, - YP_NODE_TERNARY, - YP_NODE_UNLESS_MODIFIER, - YP_NODE_UNTIL_MODIFIER, - YP_NODE_VARIABLE_REFERENCE, - YP_NODE_WHILE_MODIFIER, -} yp_node_type_t; - -// This is the overall tagged union representing a node in the syntax tree. -typedef struct yp_node { - // This represents the type of the node. It somewhat maps to the nodes that - // existed in the original grammar and ripper, but it's not a 1:1 mapping. - yp_node_type_t type; - - // This is the location of the node in the source. It's a range of bytes - // containing a start and an end. - yp_location_t location; - - // Every entry in this union is a different kind of node in the tree. For - // the most part they only contain one or two child nodes, except for the - // more complicated nodes like params. There may be an opportunity for - // optimization here by combining node types that share the same shape, but - // it might not end up mattering in the final compiled code. - union { - // Assignment - struct { - struct yp_node *target; - yp_token_t operator; - struct yp_node *value; - } assignment; - - // Binary - struct { - struct yp_node *left; - yp_token_t operator; - struct yp_node *right; - } binary; - - // CharacterLiteral - struct { - yp_token_t value; - } character_literal; - - // FloatLiteral - struct { - yp_token_t value; - } float_literal; - - // Identifier - struct { - yp_token_t value; - } identifier; - - // IfModifier - struct { - struct yp_node *statement; - yp_token_t keyword; - struct yp_node *predicate; - } if_modifier; - - // ImaginaryLiteral - struct { - yp_token_t value; - } imaginary_literal; - - // IntegerLiteral - struct { - yp_token_t value; - } integer_literal; - - // OperatorAssignment - struct { - struct yp_node *target; - yp_token_t operator; - struct yp_node *value; - } operator_assignment; - - // Program - struct { - struct yp_node *statements; - } program; - - // RationalLiteral - struct { - yp_token_t value; - } rational_literal; - - // Redo - struct { - yp_token_t value; - } redo; - - // Retry - struct { - yp_token_t value; - } retry; - - // Statements - struct { - struct yp_node_list *body; - } statements; - - // Ternary - struct { - struct yp_node *predicate; - yp_token_t question_mark; - struct yp_node *true_expression; - yp_token_t colon; - struct yp_node *false_expression; - } ternary; - - // UnlessModifier - struct { - struct yp_node *statement; - yp_token_t keyword; - struct yp_node *predicate; - } unless_modifier; - - // UntilModifier - struct { - struct yp_node *statement; - yp_token_t keyword; - struct yp_node *predicate; - } until_modifier; - - // VariableReference - struct { - yp_token_t value; - } variable_reference; - - // WhileModifier - struct { - struct yp_node *statement; - yp_token_t keyword; - struct yp_node *predicate; - } while_modifier; - } as; -} yp_node_t; - -/******************************************************************************/ -/* END TEMPLATE */ -/******************************************************************************/ - // Initialize a parser with the given start and end pointers. void yp_parser_init(yp_parser_t *parser, const char *source, off_t size);