Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 52 additions & 5 deletions src/browser/dump.zig
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ pub const Opts = struct {
// set to include element shadowroots in the dump
page: ?*const Page = null,

exclude_scripts: bool = false,
strip_mode: StripMode = .{},

pub const StripMode = struct {
js: bool = false,
ui: bool = false,
css: bool = false,
};
};

// writer must be a std.io.Writer
Expand Down Expand Up @@ -67,7 +73,7 @@ pub fn writeNode(node: *parser.Node, opts: Opts, writer: *std.Io.Writer) anyerro
.element => {
// open the tag
const tag_type = try parser.nodeHTMLGetTagType(node) orelse .undef;
if (opts.exclude_scripts and try isScriptOrRelated(tag_type, node)) {
if (try isStripped(tag_type, node, opts.strip_mode)) {
return;
}

Expand Down Expand Up @@ -159,9 +165,22 @@ pub fn writeChildren(root: *parser.Node, opts: Opts, writer: *std.Io.Writer) !vo
}
}

// When `exclude_scripts` is passed to dump, we don't include <script> tags.
// We also want to omit <link rel=preload as=ascript>
fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
fn isStripped(tag_type: parser.Tag, node: *parser.Node, strip_mode: Opts.StripMode) !bool {
if (strip_mode.js and try isJsRelated(tag_type, node)) {
return true;
}

if (strip_mode.css and try isCssRelated(tag_type, node)) {
return true;
}

if (strip_mode.ui and try isUIRelated(tag_type, node)) {
return true;
}
return false;
}

fn isJsRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (tag_type == .script) {
return true;
}
Expand All @@ -178,6 +197,34 @@ fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
return false;
}

fn isCssRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (tag_type == .style) {
return true;
}
if (tag_type == .link) {
const el = parser.nodeToElement(node);
const rel = try parser.elementGetAttribute(el, "rel") orelse return false;
return std.ascii.eqlIgnoreCase(rel, "stylesheet");
}
return false;
}

fn isUIRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
if (try isCssRelated(tag_type, node)) {
return true;
}
if (tag_type == .img or tag_type == .picture or tag_type == .video) {
return true;
}
if (tag_type == .undef) {
const name = try parser.nodeLocalName(node);
if (std.mem.eql(u8, name, "svg")) {
return true;
}
}
return false;
}

// area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
// https://html.spec.whatwg.org/#void-elements
fn isVoid(elem: *parser.Element) !bool {
Expand Down
4 changes: 2 additions & 2 deletions src/browser/page.zig
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub const Page = struct {
// set to include element shadowroots in the dump
page: ?*const Page = null,
with_base: bool = false,
exclude_scripts: bool = false,
strip_mode: Dump.Opts.StripMode = .{},
};

// dump writes the page content into the given file.
Expand Down Expand Up @@ -228,7 +228,7 @@ pub const Page = struct {

try Dump.writeHTML(doc, .{
.page = opts.page,
.exclude_scripts = opts.exclude_scripts,
.strip_mode = opts.strip_mode,
}, out);
}

Expand Down
54 changes: 48 additions & 6 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const log = @import("log.zig");
const App = @import("app.zig").App;
const Server = @import("server.zig").Server;
const Browser = @import("browser/browser.zig").Browser;
const DumpStripMode = @import("browser/dump.zig").Opts.StripMode;

const build_config = @import("build_config");

Expand Down Expand Up @@ -184,7 +185,7 @@ fn run(alloc: Allocator) !void {
try page.dump(.{
.page = page,
.with_base = opts.withbase,
.exclude_scripts = opts.noscript,
.strip_mode = opts.strip_mode,
}, &writer.interface);
try writer.interface.flush();
}
Expand Down Expand Up @@ -292,8 +293,8 @@ const Command = struct {
url: []const u8,
dump: bool = false,
common: Common,
noscript: bool = false,
withbase: bool = false,
strip_mode: DumpStripMode = .{},
};

const Common = struct {
Expand Down Expand Up @@ -372,7 +373,14 @@ const Command = struct {
\\Options:
\\--dump Dumps document to stdout.
\\ Defaults to false.
\\--noscript Exclude <script> tags in dump. Defaults to false.
\\
\\--strip_mode Comma separated list of tag groups to remove from dump
\\ the dump. e.g. --strip_mode js,css
\\ - "js" script and link[as=script, rel=preload]
\\ - "ui" includes img, picture, video, css and svg
\\ - "css" includes style and link[rel=stylesheet]
\\ - "full" includes js, ui and css
\\
\\--with_base Add a <base> tag in dump. Defaults to false.
\\
++ common_options ++
Expand Down Expand Up @@ -460,6 +468,10 @@ fn inferMode(opt: []const u8) ?App.RunMode {
return .fetch;
}

if (std.mem.eql(u8, opt, "--strip_mode")) {
return .fetch;
}

if (std.mem.eql(u8, opt, "--with_base")) {
return .fetch;
}
Expand Down Expand Up @@ -545,10 +557,10 @@ fn parseFetchArgs(
args: *std.process.ArgIterator,
) !Command.Fetch {
var dump: bool = false;
var noscript: bool = false;
var withbase: bool = false;
var url: ?[]const u8 = null;
var common: Command.Common = .{};
var strip_mode: DumpStripMode = .{};

while (args.next()) |opt| {
if (std.mem.eql(u8, "--dump", opt)) {
Expand All @@ -557,7 +569,11 @@ fn parseFetchArgs(
}

if (std.mem.eql(u8, "--noscript", opt)) {
noscript = true;
log.warn(.app, "deprecation warning", .{
.feature = "--noscript argument",
.hint = "use '--strip_mode js' instead",
});
strip_mode.js = true;
continue;
}

Expand All @@ -566,6 +582,32 @@ fn parseFetchArgs(
continue;
}

if (std.mem.eql(u8, "--strip_mode", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--strip_mode" });
return error.InvalidArgument;
};

var it = std.mem.splitScalar(u8, str, ',');
while (it.next()) |part| {
const trimmed = std.mem.trim(u8, part, &std.ascii.whitespace);
if (std.mem.eql(u8, trimmed, "js")) {
strip_mode.js = true;
} else if (std.mem.eql(u8, trimmed, "ui")) {
strip_mode.ui = true;
} else if (std.mem.eql(u8, trimmed, "css")) {
strip_mode.css = true;
} else if (std.mem.eql(u8, trimmed, "full")) {
strip_mode.js = true;
strip_mode.ui = true;
strip_mode.css = true;
} else {
log.fatal(.app, "invalid option choice", .{ .arg = "--strip_mode", .value = trimmed });
}
}
continue;
}

if (try parseCommonArg(allocator, opt, args, &common)) {
continue;
}
Expand All @@ -591,8 +633,8 @@ fn parseFetchArgs(
.url = url.?,
.dump = dump,
.common = common,
.noscript = noscript,
.withbase = withbase,
.strip_mode = strip_mode,
};
}

Expand Down