Skip to content

Commit 7f55056

Browse files
gameboxjoshuawarner32
authored andcommitted
First draft of Zig version of Parser
Note this commit is ~95% the work of Anthony Bullard <anthony.bullard@gmail.com>, I (Josh Warner) just rebased + fixed some bugs + signed it.
1 parent f82fe06 commit 7f55056

File tree

11 files changed

+2277
-382
lines changed

11 files changed

+2277
-382
lines changed

build.zig

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,20 @@ pub fn build(b: *std.Build) void {
105105
.{ .name = "cli", .module = b.createModule(.{ .root_source_file = b.path("src/cli.zig") }) },
106106
},
107107
);
108-
const tokenize_module = b.createModule(.{ .root_source_file = b.path("src/check/parse/tokenize.zig") });
109-
tokenize_module.addImport("GenCatData", zg.module("GenCatData"));
110-
add_fuzz_target(
111-
b,
112-
build_afl,
113-
check_step,
114-
target,
115-
"tokenize",
116-
b.path("src/fuzz/tokenize.zig"),
117-
&[_]Import{
118-
.{ .name = "GenCatData", .module = zg.module("GenCatData") },
119-
.{ .name = "tokenize", .module = tokenize_module },
120-
},
121-
);
108+
// const tokenize_module = b.createModule(.{ .root_source_file = b.path("src/check/parse/tokenize.zig") });
109+
// tokenize_module.addImport("GenCatData", zg.module("GenCatData"));
110+
// add_fuzz_target(
111+
// b,
112+
// build_afl,
113+
// check_step,
114+
// target,
115+
// "tokenize",
116+
// b.path("src/fuzz/tokenize.zig"),
117+
// &[_]Import{
118+
// .{ .name = "GenCatData", .module = zg.module("GenCatData") },
119+
// .{ .name = "tokenize", .module = tokenize_module },
120+
// },
121+
// );
122122
}
123123
}
124124

src/base/Module.zig

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ const Problem = problem.Problem;
1414
const Module = @This();
1515

1616
/// The full name of a module, e.g. `Foo.Bar`.
17-
name: []u8,
17+
name: []const u8,
1818
/// The shorthand for the package this module is imported from
1919
/// if it is not from the current package, e.g. `json` in `json.Json`.
20-
package_shorthand: ?[]u8,
20+
package_shorthand: ?[]const u8,
2121
/// Whether the module is a builtin module.
2222
is_builtin: bool,
2323
/// The list of all idents exposed by this module.
@@ -72,7 +72,7 @@ pub const Store = struct {
7272
pub fn lookup(
7373
self: *Store,
7474
name: []const u8,
75-
package_shorthand: ?[]u8,
75+
package_shorthand: ?[]const u8,
7676
) ?Idx {
7777
const items = self.modules.items;
7878
for (0..self.modules.len()) |index| {
@@ -98,8 +98,8 @@ pub const Store = struct {
9898
/// reusing an existing [Idx] if the module was already imported.
9999
pub fn getOrInsert(
100100
self: *Store,
101-
name: []u8,
102-
package_shorthand: ?[]u8,
101+
name: []const u8,
102+
package_shorthand: ?[]const u8,
103103
) LookupResult {
104104
if (self.lookup(name, package_shorthand)) |idx| {
105105
return LookupResult{ .module_idx = idx, .was_present = true };
@@ -115,11 +115,11 @@ pub const Store = struct {
115115
}
116116
}
117117

118-
pub fn getName(self: *Store, idx: Idx) []u8 {
118+
pub fn getName(self: *Store, idx: Idx) []const u8 {
119119
return self.modules.items.items(.name)[@as(usize, @intFromEnum(idx))];
120120
}
121121

122-
pub fn getPackageShorthand(self: *Store, idx: Idx) ?[]u8 {
122+
pub fn getPackageShorthand(self: *Store, idx: Idx) ?[]const u8 {
123123
return self.modules.items.items(.package_shorthand)[@as(usize, @intFromEnum(idx))];
124124
}
125125

src/check/canonicalize.zig

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ pub const IR = @import("./canonicalize/IR.zig");
2828
/// The canonicalization occurs on a single module (file) in isolation. This allows for this work to be easily parallelized and also cached. So where the source code for a module has not changed, the CanIR can simply be loaded from disk and used immediately.
2929
pub fn canonicalize(
3030
can_ir: IR,
31-
parse_ir: parse.IR,
31+
parse_ir: *parse.IR,
3232
allocator: std.mem.Allocator,
3333
) void {
3434
var env = can_ir.env;
@@ -37,29 +37,29 @@ pub fn canonicalize(
3737
const scope = Scope.init(&env, &builtin_aliases, &imported_idents, allocator);
3838
_ = scope;
3939

40-
for (parse_ir.defs.items.items) |stmt| {
40+
const file = parse_ir.store.getFile(parse.IR.NodeStore.FileIdx{ .id = 0 });
41+
42+
for (file.statements) |stmt_id| {
43+
const stmt = parse_ir.store.getStatement(stmt_id);
4144
switch (stmt) {
42-
.Import => |import| {
43-
const res = env.modules.getOrInsert(
44-
import.name,
45-
import.package_shorthand,
46-
);
45+
.import => |import| {
46+
const name = parse_ir.resolve(import.module_name_tok);
47+
const name_region = parse_ir.tokens.resolve(import.module_name_tok);
48+
const res = env.modules.getOrInsert(name, "todo_shorthand");
4749

4850
if (res.was_present) {
4951
_ = env.problems.append(Problem.Canonicalize.make(.{ .DuplicateImport = .{
50-
.duplicate_import_region = import.name_region,
52+
.duplicate_import_region = name_region,
5153
} }));
5254
}
5355

54-
for (import.exposing.items.items) |exposed| {
55-
const exposed_ident = switch (exposed) {
56-
.Value => |ident| ident,
57-
.Type => |ident| ident,
58-
.CustomTagUnion => |custom| custom.name,
59-
};
60-
env.addExposedIdentForModule(exposed_ident, res.module_idx);
61-
}
56+
// TODO: need to intern the strings; not sure how that works currently?
57+
// for (import.exposes) |exposed| {
58+
// const value_name = parse_ir.resolve(exposed);
59+
// env.addExposedIdentForModule(value_name, res.module_idx);
60+
// }
6261
},
62+
else => std.debug.panic("Unhandled statement type: {}", .{stmt}),
6363
}
6464
}
6565

src/check/parse.zig

Lines changed: 41 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -1,193 +1,46 @@
11
const std = @import("std");
2-
const tokenize = @import("parse/tokenize.zig");
3-
const Region = @import("../base/Region.zig");
42

3+
const tokenize = @import("parse/tokenize.zig");
4+
const TokenIndex = tokenize.TokenIndex;
5+
const TokenizedBuffer = tokenize.TokenizedBuffer;
56
pub const IR = @import("parse/IR.zig");
6-
7-
pub const Node = struct {
8-
tag: Tag,
9-
data: Data,
10-
region: Region,
11-
12-
pub const Tag = enum {
13-
Unary,
14-
Binary,
15-
// TODO
16-
};
17-
18-
pub const Data = union {
19-
Unary: UnaryOpData,
20-
Binary: BinaryOpData,
21-
// Add more node data as needed
22-
};
23-
24-
pub const UnaryOpData = struct {
25-
// TODO
26-
};
27-
28-
pub const BinaryOpData = struct {
29-
// TODO
7+
const NodeList = IR.NodeList;
8+
const Diagnostic = IR.Diagnostic;
9+
const GenCatData = @import("GenCatData");
10+
const Parser = @import("parse/Parser.zig");
11+
const exitOnOom = @import("../collections/utils.zig").exitOnOom;
12+
13+
source: []const u8,
14+
tokens: TokenizedBuffer,
15+
store: IR.NodeStore,
16+
errors: []const Diagnostic,
17+
18+
/// Parses a single Roc file. The returned AST should be deallocated by calling deinit
19+
/// after its data is used to create the next IR, or at the end of any test.
20+
pub fn parse(allocator: std.mem.Allocator, source: []const u8) IR {
21+
var messages: [128]tokenize.Diagnostic = undefined;
22+
const msg_slice = messages[0..];
23+
var gc = GenCatData.init(allocator) catch exitOnOom();
24+
defer gc.deinit();
25+
var tokenizer = tokenize.Tokenizer.init(source, msg_slice, &gc, allocator);
26+
tokenizer.tokenize();
27+
const result = tokenizer.finish_and_deinit();
28+
29+
if (result.messages.len > 0) {
30+
std.debug.print("Found these issues while parsing:\n{any}", .{result.messages});
31+
}
32+
33+
var parser = Parser.init(allocator, result.tokens);
34+
defer parser.deinit();
35+
36+
parser.parseFile();
37+
38+
const errors = parser.diagnostics.toOwnedSlice() catch exitOnOom();
39+
40+
return .{
41+
.source = source,
42+
.tokens = result.tokens,
43+
.store = parser.store,
44+
.errors = errors,
3045
};
31-
};
32-
33-
pub const Diagnostic = struct {
34-
tag: Tag,
35-
region: Region,
36-
37-
pub const Tag = enum {
38-
// TODO
39-
};
40-
};
41-
42-
pub const Parser = struct {
43-
pos: usize,
44-
tokens: tokenize.TokenizedBuffer,
45-
nodes: std.MultiArrayList(Node),
46-
diagnostics: std.ArrayList(tokenize.Diagnostic),
47-
allocator: std.mem.Allocator,
48-
49-
pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser {
50-
return Parser{
51-
.pos = 0,
52-
.tokens = tokens,
53-
.nodes = std.MultiArrayList(Node){},
54-
.diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator),
55-
.allocator = allocator,
56-
};
57-
}
58-
59-
pub fn advance(self: *Parser) void {
60-
if (self.pos >= self.tokens.tokens.len) {
61-
return;
62-
}
63-
std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])});
64-
self.pos += 1;
65-
}
66-
67-
pub fn peek(self: *Parser) tokenize.Token.Tag {
68-
if (self.pos >= self.tokens.tokens.len) {
69-
return .EndOfFile;
70-
}
71-
return self.tokens.tokens.items(.tag)[self.pos];
72-
}
73-
74-
// If the next token is a newline, consume it
75-
// Returns the indent level of the next line if it is a newline, otherwise null
76-
pub fn consumeNewline(self: *Parser) ?u16 {
77-
if (self.peek() != .Newline) {
78-
return null;
79-
}
80-
const indent = self.tokens.tokens.items(.offset)[self.pos];
81-
self.advance();
82-
return @intCast(indent);
83-
}
84-
85-
// Returns the indent level of the next line if the next token is a newline, otherwise null
86-
pub fn peekNewline(self: *Parser) ?u16 {
87-
if (self.peek() != .Newline) {
88-
return null;
89-
}
90-
const indent = self.tokens.tokens.items(.offset)[self.pos];
91-
return @intCast(indent);
92-
}
93-
94-
pub fn parseFile(self: *Parser) !void {
95-
while (self.peek() != .EndOfFile) {
96-
if (self.consumeNewline()) |indent| {
97-
std.debug.print("parseFile indent {d}\n", .{indent});
98-
std.debug.assert(indent == 0); // TODO: report an error
99-
}
100-
if (self.peek() == .EndOfFile) {
101-
break;
102-
}
103-
self.parseStmt(0);
104-
}
105-
}
106-
107-
pub fn parseStmt(self: *Parser, base_indent: u16) void {
108-
switch (self.peek()) {
109-
.LowerIdent => {
110-
self.advance();
111-
if (self.peek() == .OpEquals) {
112-
self.finishParseAssign(base_indent);
113-
std.debug.print("parseStmt assign\n", .{});
114-
} else {
115-
std.debug.print("parseStmt expr\n", .{});
116-
}
117-
},
118-
else => {
119-
std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())});
120-
},
121-
}
122-
}
123-
124-
pub fn parseExpr(self: *Parser) void {
125-
switch (self.peek()) {
126-
.LowerIdent => {
127-
self.advance();
128-
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
129-
// TODO: add node
130-
},
131-
.Int => {
132-
self.advance();
133-
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
134-
// TODO: add node
135-
},
136-
else => {
137-
std.debug.panic("todo: emit error", .{});
138-
},
139-
}
140-
}
141-
142-
pub fn finishParseAssign(self: *Parser, base_indent: u16) void {
143-
std.debug.assert(self.peek() == .OpEquals);
144-
self.advance();
145-
if (self.consumeNewline()) |indent| {
146-
std.debug.print("startParseAssign indent {d}\n", .{indent});
147-
if (indent <= base_indent) {
148-
std.debug.panic("todo: emit error", .{});
149-
}
150-
151-
self.parseStmt(indent);
152-
153-
while (true) {
154-
if (self.peekNewline()) |i| {
155-
if (i <= base_indent) {
156-
break;
157-
}
158-
self.advance();
159-
} else {
160-
break;
161-
}
162-
self.parseStmt(indent);
163-
}
164-
} else {
165-
self.parseExpr();
166-
}
167-
168-
std.debug.print("finishParseAssign\n", .{});
169-
}
170-
};
171-
test "Parser advance and peek" {
172-
const allocator = std.heap.page_allocator;
173-
var tokens = try tokenize.TokenizedBuffer.init(allocator);
174-
// x =
175-
// y = 1
176-
// y
177-
try tokens.pushToken(.LowerIdent, 0, 1);
178-
try tokens.pushToken(.OpEquals, 0, 0);
179-
try tokens.pushNewline(4);
180-
try tokens.pushToken(.LowerIdent, 0, 0);
181-
try tokens.pushToken(.OpEquals, 0, 0);
182-
try tokens.pushToken(.Int, 0, 0);
183-
try tokens.pushNewline(4);
184-
try tokens.pushToken(.LowerIdent, 0, 0);
185-
try tokens.pushNewline(0);
186-
try tokens.pushToken(.EndOfFile, 0, 0);
187-
188-
var parser = Parser.init(tokens, allocator);
189-
190-
try parser.parseFile();
191-
192-
// std.debug.assert(parser.nodes)
19346
}

0 commit comments

Comments
 (0)