|
1 | 1 | const std = @import("std"); |
2 | | -const tokenize = @import("parse/tokenize.zig"); |
3 | | -const Region = @import("../base/Region.zig"); |
4 | 2 |
|
| 3 | +const tokenize = @import("parse/tokenize.zig"); |
| 4 | +const TokenIndex = tokenize.TokenIndex; |
| 5 | +const TokenizedBuffer = tokenize.TokenizedBuffer; |
5 | 6 | pub const IR = @import("parse/IR.zig"); |
6 | | - |
7 | | -pub const Node = struct { |
8 | | - tag: Tag, |
9 | | - data: Data, |
10 | | - region: Region, |
11 | | - |
12 | | - pub const Tag = enum { |
13 | | - Unary, |
14 | | - Binary, |
15 | | - // TODO |
16 | | - }; |
17 | | - |
18 | | - pub const Data = union { |
19 | | - Unary: UnaryOpData, |
20 | | - Binary: BinaryOpData, |
21 | | - // Add more node data as needed |
22 | | - }; |
23 | | - |
24 | | - pub const UnaryOpData = struct { |
25 | | - // TODO |
26 | | - }; |
27 | | - |
28 | | - pub const BinaryOpData = struct { |
29 | | - // TODO |
| 7 | +const NodeList = IR.NodeList; |
| 8 | +const Diagnostic = IR.Diagnostic; |
| 9 | +const GenCatData = @import("GenCatData"); |
| 10 | +const Parser = @import("parse/Parser.zig"); |
| 11 | +const exitOnOom = @import("../collections/utils.zig").exitOnOom; |
| 12 | + |
| 13 | +source: []const u8, |
| 14 | +tokens: TokenizedBuffer, |
| 15 | +store: IR.NodeStore, |
| 16 | +errors: []const Diagnostic, |
| 17 | + |
| 18 | +/// Parses a single Roc file. The returned AST should be deallocated by calling deinit |
| 19 | +/// after its data is used to create the next IR, or at the end of any test. |
| 20 | +pub fn parse(allocator: std.mem.Allocator, source: []const u8) IR { |
| 21 | + var messages: [128]tokenize.Diagnostic = undefined; |
| 22 | + const msg_slice = messages[0..]; |
| 23 | + var gc = GenCatData.init(allocator) catch exitOnOom(); |
| 24 | + defer gc.deinit(); |
| 25 | + var tokenizer = tokenize.Tokenizer.init(source, msg_slice, &gc, allocator); |
| 26 | + tokenizer.tokenize(); |
| 27 | + const result = tokenizer.finish_and_deinit(); |
| 28 | + |
| 29 | + if (result.messages.len > 0) { |
| 30 | + std.debug.print("Found these issues while parsing:\n{any}", .{result.messages}); |
| 31 | + } |
| 32 | + |
| 33 | + var parser = Parser.init(allocator, result.tokens); |
| 34 | + defer parser.deinit(); |
| 35 | + |
| 36 | + parser.parseFile(); |
| 37 | + |
| 38 | + const errors = parser.diagnostics.toOwnedSlice() catch exitOnOom(); |
| 39 | + |
| 40 | + return .{ |
| 41 | + .source = source, |
| 42 | + .tokens = result.tokens, |
| 43 | + .store = parser.store, |
| 44 | + .errors = errors, |
30 | 45 | }; |
31 | | -}; |
32 | | - |
33 | | -pub const Diagnostic = struct { |
34 | | - tag: Tag, |
35 | | - region: Region, |
36 | | - |
37 | | - pub const Tag = enum { |
38 | | - // TODO |
39 | | - }; |
40 | | -}; |
41 | | - |
42 | | -pub const Parser = struct { |
43 | | - pos: usize, |
44 | | - tokens: tokenize.TokenizedBuffer, |
45 | | - nodes: std.MultiArrayList(Node), |
46 | | - diagnostics: std.ArrayList(tokenize.Diagnostic), |
47 | | - allocator: std.mem.Allocator, |
48 | | - |
49 | | - pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser { |
50 | | - return Parser{ |
51 | | - .pos = 0, |
52 | | - .tokens = tokens, |
53 | | - .nodes = std.MultiArrayList(Node){}, |
54 | | - .diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator), |
55 | | - .allocator = allocator, |
56 | | - }; |
57 | | - } |
58 | | - |
59 | | - pub fn advance(self: *Parser) void { |
60 | | - if (self.pos >= self.tokens.tokens.len) { |
61 | | - return; |
62 | | - } |
63 | | - std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])}); |
64 | | - self.pos += 1; |
65 | | - } |
66 | | - |
67 | | - pub fn peek(self: *Parser) tokenize.Token.Tag { |
68 | | - if (self.pos >= self.tokens.tokens.len) { |
69 | | - return .EndOfFile; |
70 | | - } |
71 | | - return self.tokens.tokens.items(.tag)[self.pos]; |
72 | | - } |
73 | | - |
74 | | - // If the next token is a newline, consume it |
75 | | - // Returns the indent level of the next line if it is a newline, otherwise null |
76 | | - pub fn consumeNewline(self: *Parser) ?u16 { |
77 | | - if (self.peek() != .Newline) { |
78 | | - return null; |
79 | | - } |
80 | | - const indent = self.tokens.tokens.items(.offset)[self.pos]; |
81 | | - self.advance(); |
82 | | - return @intCast(indent); |
83 | | - } |
84 | | - |
85 | | - // Returns the indent level of the next line if the next token is a newline, otherwise null |
86 | | - pub fn peekNewline(self: *Parser) ?u16 { |
87 | | - if (self.peek() != .Newline) { |
88 | | - return null; |
89 | | - } |
90 | | - const indent = self.tokens.tokens.items(.offset)[self.pos]; |
91 | | - return @intCast(indent); |
92 | | - } |
93 | | - |
94 | | - pub fn parseFile(self: *Parser) !void { |
95 | | - while (self.peek() != .EndOfFile) { |
96 | | - if (self.consumeNewline()) |indent| { |
97 | | - std.debug.print("parseFile indent {d}\n", .{indent}); |
98 | | - std.debug.assert(indent == 0); // TODO: report an error |
99 | | - } |
100 | | - if (self.peek() == .EndOfFile) { |
101 | | - break; |
102 | | - } |
103 | | - self.parseStmt(0); |
104 | | - } |
105 | | - } |
106 | | - |
107 | | - pub fn parseStmt(self: *Parser, base_indent: u16) void { |
108 | | - switch (self.peek()) { |
109 | | - .LowerIdent => { |
110 | | - self.advance(); |
111 | | - if (self.peek() == .OpEquals) { |
112 | | - self.finishParseAssign(base_indent); |
113 | | - std.debug.print("parseStmt assign\n", .{}); |
114 | | - } else { |
115 | | - std.debug.print("parseStmt expr\n", .{}); |
116 | | - } |
117 | | - }, |
118 | | - else => { |
119 | | - std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())}); |
120 | | - }, |
121 | | - } |
122 | | - } |
123 | | - |
124 | | - pub fn parseExpr(self: *Parser) void { |
125 | | - switch (self.peek()) { |
126 | | - .LowerIdent => { |
127 | | - self.advance(); |
128 | | - std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())}); |
129 | | - // TODO: add node |
130 | | - }, |
131 | | - .Int => { |
132 | | - self.advance(); |
133 | | - std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())}); |
134 | | - // TODO: add node |
135 | | - }, |
136 | | - else => { |
137 | | - std.debug.panic("todo: emit error", .{}); |
138 | | - }, |
139 | | - } |
140 | | - } |
141 | | - |
142 | | - pub fn finishParseAssign(self: *Parser, base_indent: u16) void { |
143 | | - std.debug.assert(self.peek() == .OpEquals); |
144 | | - self.advance(); |
145 | | - if (self.consumeNewline()) |indent| { |
146 | | - std.debug.print("startParseAssign indent {d}\n", .{indent}); |
147 | | - if (indent <= base_indent) { |
148 | | - std.debug.panic("todo: emit error", .{}); |
149 | | - } |
150 | | - |
151 | | - self.parseStmt(indent); |
152 | | - |
153 | | - while (true) { |
154 | | - if (self.peekNewline()) |i| { |
155 | | - if (i <= base_indent) { |
156 | | - break; |
157 | | - } |
158 | | - self.advance(); |
159 | | - } else { |
160 | | - break; |
161 | | - } |
162 | | - self.parseStmt(indent); |
163 | | - } |
164 | | - } else { |
165 | | - self.parseExpr(); |
166 | | - } |
167 | | - |
168 | | - std.debug.print("finishParseAssign\n", .{}); |
169 | | - } |
170 | | -}; |
171 | | -test "Parser advance and peek" { |
172 | | - const allocator = std.heap.page_allocator; |
173 | | - var tokens = try tokenize.TokenizedBuffer.init(allocator); |
174 | | - // x = |
175 | | - // y = 1 |
176 | | - // y |
177 | | - try tokens.pushToken(.LowerIdent, 0, 1); |
178 | | - try tokens.pushToken(.OpEquals, 0, 0); |
179 | | - try tokens.pushNewline(4); |
180 | | - try tokens.pushToken(.LowerIdent, 0, 0); |
181 | | - try tokens.pushToken(.OpEquals, 0, 0); |
182 | | - try tokens.pushToken(.Int, 0, 0); |
183 | | - try tokens.pushNewline(4); |
184 | | - try tokens.pushToken(.LowerIdent, 0, 0); |
185 | | - try tokens.pushNewline(0); |
186 | | - try tokens.pushToken(.EndOfFile, 0, 0); |
187 | | - |
188 | | - var parser = Parser.init(tokens, allocator); |
189 | | - |
190 | | - try parser.parseFile(); |
191 | | - |
192 | | - // std.debug.assert(parser.nodes) |
193 | 46 | } |
0 commit comments