roc-lang
diff --git a/‎build.zig‎
Lines changed: 14 additions & 14 deletions b/‎build.zig‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎src/base/Module.zig‎
Lines changed: 7 additions & 7 deletions b/‎src/base/Module.zig‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/check/canonicalize.zig‎
Lines changed: 16 additions & 16 deletions b/‎src/check/canonicalize.zig‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎src/check/parse.zig‎
Lines changed: 41 additions & 188 deletions b/‎src/check/parse.zig‎
Lines changed: 41 additions & 188 deletions
@@ -105,20 +105,20 @@ pub fn build(b: *std.Build) void {
                 .{ .name = "cli", .module = b.createModule(.{ .root_source_file = b.path("src/cli.zig") }) },
             },
         );
-        const tokenize_module = b.createModule(.{ .root_source_file = b.path("src/check/parse/tokenize.zig") });
-        tokenize_module.addImport("GenCatData", zg.module("GenCatData"));
-        add_fuzz_target(
-            b,
-            build_afl,
-            check_step,
-            target,
-            "tokenize",
-            b.path("src/fuzz/tokenize.zig"),
-            &[_]Import{
-                .{ .name = "GenCatData", .module = zg.module("GenCatData") },
-                .{ .name = "tokenize", .module = tokenize_module },
-            },
-        );
+        // const tokenize_module = b.createModule(.{ .root_source_file = b.path("src/check/parse/tokenize.zig") });
+        // tokenize_module.addImport("GenCatData", zg.module("GenCatData"));
+        // add_fuzz_target(
+        //     b,
+        //     build_afl,
+        //     check_step,
+        //     target,
+        //     "tokenize",
+        //     b.path("src/fuzz/tokenize.zig"),
+        //     &[_]Import{
+        //         .{ .name = "GenCatData", .module = zg.module("GenCatData") },
+        //         .{ .name = "tokenize", .module = tokenize_module },
+        //     },
+        // );
     }
 }
 
 
@@ -14,10 +14,10 @@ const Problem = problem.Problem;
 const Module = @This();
 
 /// The full name of a module, e.g. `Foo.Bar`.
-name: []u8,
+name: []const u8,
 /// The shorthand for the package this module is imported from
 /// if it is not from the current package, e.g. `json` in `json.Json`.
-package_shorthand: ?[]u8,
+package_shorthand: ?[]const u8,
 /// Whether the module is a builtin module.
 is_builtin: bool,
 /// The list of all idents exposed by this module.
@@ -72,7 +72,7 @@ pub const Store = struct {
     pub fn lookup(
         self: *Store,
         name: []const u8,
-        package_shorthand: ?[]u8,
+        package_shorthand: ?[]const u8,
     ) ?Idx {
         const items = self.modules.items;
         for (0..self.modules.len()) |index| {
@@ -98,8 +98,8 @@ pub const Store = struct {
     /// reusing an existing [Idx] if the module was already imported.
     pub fn getOrInsert(
         self: *Store,
-        name: []u8,
-        package_shorthand: ?[]u8,
+        name: []const u8,
+        package_shorthand: ?[]const u8,
     ) LookupResult {
         if (self.lookup(name, package_shorthand)) |idx| {
             return LookupResult{ .module_idx = idx, .was_present = true };
@@ -115,11 +115,11 @@ pub const Store = struct {
         }
     }
 
-    pub fn getName(self: *Store, idx: Idx) []u8 {
+    pub fn getName(self: *Store, idx: Idx) []const u8 {
         return self.modules.items.items(.name)[@as(usize, @intFromEnum(idx))];
     }
 
-    pub fn getPackageShorthand(self: *Store, idx: Idx) ?[]u8 {
+    pub fn getPackageShorthand(self: *Store, idx: Idx) ?[]const u8 {
         return self.modules.items.items(.package_shorthand)[@as(usize, @intFromEnum(idx))];
     }
 
 
@@ -28,7 +28,7 @@ pub const IR = @import("./canonicalize/IR.zig");
 /// The canonicalization occurs on a single module (file) in isolation. This allows for this work to be easily parallelized and also cached. So where the source code for a module has not changed, the CanIR can simply be loaded from disk and used immediately.
 pub fn canonicalize(
     can_ir: IR,
-    parse_ir: parse.IR,
+    parse_ir: *parse.IR,
     allocator: std.mem.Allocator,
 ) void {
     var env = can_ir.env;
@@ -37,29 +37,29 @@ pub fn canonicalize(
     const scope = Scope.init(&env, &builtin_aliases, &imported_idents, allocator);
     _ = scope;
 
-    for (parse_ir.defs.items.items) |stmt| {
+    const file = parse_ir.store.getFile(parse.IR.NodeStore.FileIdx{ .id = 0 });
+
+    for (file.statements) |stmt_id| {
+        const stmt = parse_ir.store.getStatement(stmt_id);
         switch (stmt) {
-            .Import => |import| {
-                const res = env.modules.getOrInsert(
-                    import.name,
-                    import.package_shorthand,
-                );
+            .import => |import| {
+                const name = parse_ir.resolve(import.module_name_tok);
+                const name_region = parse_ir.tokens.resolve(import.module_name_tok);
+                const res = env.modules.getOrInsert(name, "todo_shorthand");
 
                 if (res.was_present) {
                     _ = env.problems.append(Problem.Canonicalize.make(.{ .DuplicateImport = .{
-                        .duplicate_import_region = import.name_region,
+                        .duplicate_import_region = name_region,
                     } }));
                 }
 
-                for (import.exposing.items.items) |exposed| {
-                    const exposed_ident = switch (exposed) {
-                        .Value => |ident| ident,
-                        .Type => |ident| ident,
-                        .CustomTagUnion => |custom| custom.name,
-                    };
-                    env.addExposedIdentForModule(exposed_ident, res.module_idx);
-                }
+                // TODO: need to intern the strings; not sure how that works currently?
+                // for (import.exposes) |exposed| {
+                //     const value_name = parse_ir.resolve(exposed);
+                //     env.addExposedIdentForModule(value_name, res.module_idx);
+                // }
             },
+            else => std.debug.panic("Unhandled statement type: {}", .{stmt}),
         }
     }
 
 
@@ -1,193 +1,46 @@
 const std = @import("std");
-const tokenize = @import("parse/tokenize.zig");
-const Region = @import("../base/Region.zig");
 
+const tokenize = @import("parse/tokenize.zig");
+const TokenIndex = tokenize.TokenIndex;
+const TokenizedBuffer = tokenize.TokenizedBuffer;
 pub const IR = @import("parse/IR.zig");
-
-pub const Node = struct {
-    tag: Tag,
-    data: Data,
-    region: Region,
-
-    pub const Tag = enum {
-        Unary,
-        Binary,
-        // TODO
-    };
-
-    pub const Data = union {
-        Unary: UnaryOpData,
-        Binary: BinaryOpData,
-        // Add more node data as needed
-    };
-
-    pub const UnaryOpData = struct {
-        // TODO
-    };
-
-    pub const BinaryOpData = struct {
-        // TODO
+const NodeList = IR.NodeList;
+const Diagnostic = IR.Diagnostic;
+const GenCatData = @import("GenCatData");
+const Parser = @import("parse/Parser.zig");
+const exitOnOom = @import("../collections/utils.zig").exitOnOom;
+
+source: []const u8,
+tokens: TokenizedBuffer,
+store: IR.NodeStore,
+errors: []const Diagnostic,
+
+/// Parses a single Roc file.  The returned AST should be deallocated by calling deinit
+/// after its data is used to create the next IR, or at the end of any test.
+pub fn parse(allocator: std.mem.Allocator, source: []const u8) IR {
+    var messages: [128]tokenize.Diagnostic = undefined;
+    const msg_slice = messages[0..];
+    var gc = GenCatData.init(allocator) catch exitOnOom();
+    defer gc.deinit();
+    var tokenizer = tokenize.Tokenizer.init(source, msg_slice, &gc, allocator);
+    tokenizer.tokenize();
+    const result = tokenizer.finish_and_deinit();
+
+    if (result.messages.len > 0) {
+        std.debug.print("Found these issues while parsing:\n{any}", .{result.messages});
+    }
+
+    var parser = Parser.init(allocator, result.tokens);
+    defer parser.deinit();
+
+    parser.parseFile();
+
+    const errors = parser.diagnostics.toOwnedSlice() catch exitOnOom();
+
+    return .{
+        .source = source,
+        .tokens = result.tokens,
+        .store = parser.store,
+        .errors = errors,
     };
-};
-
-pub const Diagnostic = struct {
-    tag: Tag,
-    region: Region,
-
-    pub const Tag = enum {
-        // TODO
-    };
-};
-
-pub const Parser = struct {
-    pos: usize,
-    tokens: tokenize.TokenizedBuffer,
-    nodes: std.MultiArrayList(Node),
-    diagnostics: std.ArrayList(tokenize.Diagnostic),
-    allocator: std.mem.Allocator,
-
-    pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser {
-        return Parser{
-            .pos = 0,
-            .tokens = tokens,
-            .nodes = std.MultiArrayList(Node){},
-            .diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator),
-            .allocator = allocator,
-        };
-    }
-
-    pub fn advance(self: *Parser) void {
-        if (self.pos >= self.tokens.tokens.len) {
-            return;
-        }
-        std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])});
-        self.pos += 1;
-    }
-
-    pub fn peek(self: *Parser) tokenize.Token.Tag {
-        if (self.pos >= self.tokens.tokens.len) {
-            return .EndOfFile;
-        }
-        return self.tokens.tokens.items(.tag)[self.pos];
-    }
-
-    // If the next token is a newline, consume it
-    // Returns the indent level of the next line if it is a newline, otherwise null
-    pub fn consumeNewline(self: *Parser) ?u16 {
-        if (self.peek() != .Newline) {
-            return null;
-        }
-        const indent = self.tokens.tokens.items(.offset)[self.pos];
-        self.advance();
-        return @intCast(indent);
-    }
-
-    // Returns the indent level of the next line if the next token is a newline, otherwise null
-    pub fn peekNewline(self: *Parser) ?u16 {
-        if (self.peek() != .Newline) {
-            return null;
-        }
-        const indent = self.tokens.tokens.items(.offset)[self.pos];
-        return @intCast(indent);
-    }
-
-    pub fn parseFile(self: *Parser) !void {
-        while (self.peek() != .EndOfFile) {
-            if (self.consumeNewline()) |indent| {
-                std.debug.print("parseFile indent {d}\n", .{indent});
-                std.debug.assert(indent == 0); // TODO: report an error
-            }
-            if (self.peek() == .EndOfFile) {
-                break;
-            }
-            self.parseStmt(0);
-        }
-    }
-
-    pub fn parseStmt(self: *Parser, base_indent: u16) void {
-        switch (self.peek()) {
-            .LowerIdent => {
-                self.advance();
-                if (self.peek() == .OpEquals) {
-                    self.finishParseAssign(base_indent);
-                    std.debug.print("parseStmt assign\n", .{});
-                } else {
-                    std.debug.print("parseStmt expr\n", .{});
-                }
-            },
-            else => {
-                std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())});
-            },
-        }
-    }
-
-    pub fn parseExpr(self: *Parser) void {
-        switch (self.peek()) {
-            .LowerIdent => {
-                self.advance();
-                std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
-                // TODO: add node
-            },
-            .Int => {
-                self.advance();
-                std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
-                // TODO: add node
-            },
-            else => {
-                std.debug.panic("todo: emit error", .{});
-            },
-        }
-    }
-
-    pub fn finishParseAssign(self: *Parser, base_indent: u16) void {
-        std.debug.assert(self.peek() == .OpEquals);
-        self.advance();
-        if (self.consumeNewline()) |indent| {
-            std.debug.print("startParseAssign indent {d}\n", .{indent});
-            if (indent <= base_indent) {
-                std.debug.panic("todo: emit error", .{});
-            }
-
-            self.parseStmt(indent);
-
-            while (true) {
-                if (self.peekNewline()) |i| {
-                    if (i <= base_indent) {
-                        break;
-                    }
-                    self.advance();
-                } else {
-                    break;
-                }
-                self.parseStmt(indent);
-            }
-        } else {
-            self.parseExpr();
-        }
-
-        std.debug.print("finishParseAssign\n", .{});
-    }
-};
-test "Parser advance and peek" {
-    const allocator = std.heap.page_allocator;
-    var tokens = try tokenize.TokenizedBuffer.init(allocator);
-    // x =
-    //     y = 1
-    //     y
-    try tokens.pushToken(.LowerIdent, 0, 1);
-    try tokens.pushToken(.OpEquals, 0, 0);
-    try tokens.pushNewline(4);
-    try tokens.pushToken(.LowerIdent, 0, 0);
-    try tokens.pushToken(.OpEquals, 0, 0);
-    try tokens.pushToken(.Int, 0, 0);
-    try tokens.pushNewline(4);
-    try tokens.pushToken(.LowerIdent, 0, 0);
-    try tokens.pushNewline(0);
-    try tokens.pushToken(.EndOfFile, 0, 0);
-
-    var parser = Parser.init(tokens, allocator);
-
-    try parser.parseFile();
-
-    // std.debug.assert(parser.nodes)
 }