Skip to content

Commit e26f8ea

Browse files
committed
Add helpers for path in workspace
- Preparatory work for more changes regarding autoloading of configs. Assisted by Claude Code.
1 parent 49b90ff commit e26f8ea

File tree

6 files changed

+644
-0
lines changed

6 files changed

+644
-0
lines changed

lib/dunolint_engine/src/dunolint_engine.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,4 +445,6 @@ let run ~running_mode f =
445445
446446
module Private = struct
447447
let mkdirs = mkdirs
448+
449+
module Path_in_workspace = Path_in_workspace
448450
end

lib/dunolint_engine/src/dunolint_engine.mli

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,10 @@ val materialize : t -> unit
116116

117117
module Private : sig
118118
val mkdirs : Relative_path.t -> unit
119+
120+
(** Path operations for workspace-relative paths with escaping prevention.
121+
122+
This module is exported for testing purposes. See
123+
{!Path_in_workspace} for documentation. *)
124+
module Path_in_workspace = Path_in_workspace
119125
end
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
(*********************************************************************************)
2+
(* Dunolint - A tool to lint and help manage files in dune projects *)
3+
(* Copyright (C) 2024-2025 Mathieu Barbin <mathieu.barbin@gmail.com> *)
4+
(* *)
5+
(* This file is part of Dunolint. *)
6+
(* *)
7+
(* Dunolint is free software; you can redistribute it and/or modify it *)
8+
(* under the terms of the GNU Lesser General Public License as published by *)
9+
(* the Free Software Foundation either version 3 of the License, or any later *)
10+
(* version, with the LGPL-3.0 Linking Exception. *)
11+
(* *)
12+
(* Dunolint is distributed in the hope that it will be useful, but WITHOUT *)
13+
(* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *)
14+
(* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *)
15+
(* and the file `NOTICE.md` at the root of this repository for more details. *)
16+
(* *)
17+
(* You should have received a copy of the GNU Lesser General Public License *)
18+
(* and the LGPL-3.0 Linking Exception along with this library. If not, see *)
19+
(* <http://www.gnu.org/licenses/> and <https://spdx.org>, respectively. *)
20+
(*********************************************************************************)
21+
22+
type t = Relative_path.t
23+
24+
let is_parent_segment s = String.equal ".." s
25+
let has_parent_segments segs = List.exists ~f:is_parent_segment segs
26+
27+
let check_escape_path_exn (t : t) =
28+
if has_parent_segments (Fpath.segs (t :> Fpath.t))
29+
then
30+
invalid_arg
31+
(Printf.sprintf
32+
"'%s': relative path escapes upward past starting point"
33+
(Relative_path.to_string t))
34+
;;
35+
36+
let chop_prefix t ~prefix =
37+
if Relative_path.equal prefix Relative_path.empty
38+
then Some t
39+
else (
40+
match Relative_path.chop_prefix t ~prefix with
41+
| None -> None
42+
| Some t as some ->
43+
check_escape_path_exn t;
44+
some)
45+
;;
46+
47+
let parent t =
48+
if Relative_path.equal t Relative_path.empty
49+
then None
50+
else (
51+
match Relative_path.parent t with
52+
| None ->
53+
(* This is the problematic case from upstream, as the function never
54+
returns [None]. Pending upgrades and TBD. *)
55+
None
56+
[@coverage off]
57+
| Some t as some ->
58+
check_escape_path_exn t;
59+
some)
60+
;;
61+
62+
let ancestors_autoloading_dirs ~path =
63+
if Relative_path.equal path Relative_path.empty
64+
then []
65+
else (
66+
check_escape_path_exn path;
67+
let segs = Fpath.segs (Relative_path.rem_empty_seg path :> Fpath.t) in
68+
List.init (List.length segs) ~f:(fun i ->
69+
List.take segs i
70+
|> List.map ~f:Fsegment.v
71+
|> Relative_path.of_list
72+
|> Relative_path.to_dir_path))
73+
;;
74+
75+
let paths_to_check_for_skip_predicates ~path =
76+
if Relative_path.equal path Relative_path.empty
77+
then []
78+
else (
79+
check_escape_path_exn path;
80+
let segs = Fpath.segs (path :> Fpath.t) in
81+
List.init
82+
(List.length segs - 1)
83+
~f:(fun i ->
84+
List.take segs (i + 1)
85+
|> List.map ~f:Fsegment.v
86+
|> Relative_path.of_list
87+
|> Relative_path.to_dir_path))
88+
;;
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
(*_********************************************************************************)
2+
(*_ Dunolint - A tool to lint and help manage files in dune projects *)
3+
(*_ Copyright (C) 2024-2025 Mathieu Barbin <mathieu.barbin@gmail.com> *)
4+
(*_ *)
5+
(*_ This file is part of Dunolint. *)
6+
(*_ *)
7+
(*_ Dunolint is free software; you can redistribute it and/or modify it *)
8+
(*_ under the terms of the GNU Lesser General Public License as published by *)
9+
(*_ the Free Software Foundation either version 3 of the License, or any later *)
10+
(*_ version, with the LGPL-3.0 Linking Exception. *)
11+
(*_ *)
12+
(*_ Dunolint is distributed in the hope that it will be useful, but WITHOUT *)
13+
(*_ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *)
14+
(*_ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *)
15+
(*_ and the file `NOTICE.md` at the root of this repository for more details. *)
16+
(*_ *)
17+
(*_ You should have received a copy of the GNU Lesser General Public License *)
18+
(*_ and the LGPL-3.0 Linking Exception along with this library. If not, see *)
19+
(*_ <http://www.gnu.org/licenses/> and <https://spdx.org>, respectively. *)
20+
(*_********************************************************************************)
21+
22+
(** Paths relative to the workspace root, with escaping path prevention.
23+
24+
This module wraps [Relative_path.t] to provide path operations specific
25+
to dunolint's workspace traversal, with additional safety guarantees for
26+
escaping paths.
27+
28+
{1 Purpose}
29+
30+
When traversing a dune workspace, dunolint needs to work with paths relative
31+
to the workspace root. This module provides:
32+
33+
- Safe path operations that prevent escaping the workspace root
34+
- Validation that paths don't contain upward-escaping [".."] segments
35+
- Workspace-aware wrappers around [Relative_path] operations
36+
37+
{1 Escaping Paths}
38+
39+
An {b escaping path} is a relative path that, after [Fpath] normalization,
40+
contains leading [".."] segments. These segments indicate the path escapes
41+
upward past its starting point.
42+
43+
{b Examples of escaping paths} (all rejected by this module):
44+
- [".."] - escapes upward by one level
45+
- ["../config"] - escapes upward then descends
46+
- ["a/../.."] - normalizes to [".."], which escapes upward
47+
- ["../../../etc/passwd"] - escapes multiple levels upward
48+
49+
{b Why reject escaping paths?}
50+
51+
+ {b Workspace boundary enforcement}: Paths in a dune workspace should
52+
reference locations within that workspace. Escaping paths reference
53+
locations outside the workspace root, which violates this invariant.
54+
+ {b Memory safety}: In previous versions, operations like [parent] could
55+
create unbounded escaping paths when called repeatedly on the empty path,
56+
leading to memory growth bugs. By rejecting escaping paths at construction
57+
time, these bugs are prevented.
58+
+ {b Semantic clarity}: Escaping paths have ambiguous meaning without
59+
additional context about where the "starting point" is. By requiring
60+
all workspace paths to be non-escaping, the semantics are clear: they're
61+
paths relative to the workspace root.
62+
63+
{1 Relationship to fpath-base}
64+
65+
This module anticipates upcoming changes to the [Relative_path] module in
66+
the fpath-base library (see fpath-base v0.4.0+). The upstream library will:
67+
68+
- Reject escaping paths in [Relative_path.of_fpath], [Relative_path.of_string],
69+
etc.
70+
- Make [Relative_path.parent] return [None] for the empty path instead of
71+
creating ["../"]
72+
- Add runtime checks in operations like [Relative_path.extend] to prevent
73+
creating escaping paths
74+
75+
For the time being, we implement our own wrapper that provides these
76+
guarantees, using [check_escape_path_exn] to validate paths. When the
77+
upstream changes are released, this module can be simplified to rely on
78+
the upstream guarantees.
79+
80+
See [fpath-base/doc/docs/explanation/path-normalization.md] for detailed
81+
documentation of the upstream approach.
82+
83+
{1 Migration Note}
84+
85+
Once fpath-base v0.4.0+ is released with escaping path rejection built-in,
86+
the explicit [check_escape_path_exn] calls in this module can be removed,
87+
as the upstream [Relative_path] module will guarantee that no escaping
88+
paths can be constructed. *)
89+
90+
type t = Relative_path.t
91+
92+
(** [check_escape_path_exn t] validates that path [t] does not escape upward.
93+
94+
Raises [Invalid_argument] if [t] contains leading [".."] segments after
95+
normalization (i.e., if it is an escaping path).
96+
97+
This function is used internally to validate results of path operations.
98+
It will become unnecessary once fpath-base v0.4.0+ guarantees that
99+
[Relative_path.t] values cannot be escaping paths.
100+
101+
{b Examples:}
102+
103+
These would raise [Invalid_argument]:
104+
{[
105+
check_escape_path_exn (Relative_path.v "..");
106+
(* escapes upward *)
107+
check_escape_path_exn (Relative_path.v "a/../..")
108+
(* normalizes to ".." *)
109+
]}
110+
111+
These are OK:
112+
{[
113+
check_escape_path_exn (Relative_path.v "a/b");
114+
(* descends only *)
115+
check_escape_path_exn (Relative_path.v "a/../b")
116+
(* normalizes to "b" *)
117+
]} *)
118+
val check_escape_path_exn : Relative_path.t -> unit
119+
120+
(** [chop_prefix t ~prefix] removes the prefix [prefix] from path [t].
121+
122+
Returns:
123+
- [Some result] where [result] is [t] with [prefix] removed from the start
124+
- [Some t] (unchanged) when [prefix] is [empty] - removing nothing returns
125+
the original path
126+
- [None] if [prefix] is not actually a prefix of [t]
127+
128+
Note: This operation works on path segments, not string prefixes.
129+
For example, ["foo/bar-baz"] does not have prefix ["foo/bar"]. *)
130+
val chop_prefix : t -> prefix:Relative_path.t -> t option
131+
132+
(** [parent t] returns the parent directory of path [t], or [None] if [t] has
133+
no parent.
134+
135+
Returns [None] when:
136+
- [t] is equal to [empty] (the path ["./"])
137+
138+
Raises [Invalid_argument] if [t] is an escaping path (contains leading [".."]
139+
after normalization). This should not occur for paths constructed through
140+
this module's API, as escaping paths are rejected during construction.
141+
142+
{b Note}: This behavior prevents infinite loops that occurred in
143+
previous versions where [parent empty] would return ["../"], creating
144+
paths that escape unboundedly.
145+
146+
If you need to navigate upward through parent directories (including
147+
above the starting point), use [Absolute_path.parent] or work with
148+
[Fpath.t] directly. *)
149+
val parent : t -> t option
150+
151+
(** [ancestors_autoloading_dirs ~path] returns all ancestor directories of [path],
152+
from the workspace root down to the parent of [path].
153+
154+
This function is specifically designed for config autoloading: it returns
155+
the list of directories that should be checked for dunolint configuration
156+
files when linting a file at [path].
157+
158+
The returned list is ordered from root to deepest ancestor (i.e., from
159+
shortest to longest paths), which matches the order in which configs should
160+
be loaded and accumulated.
161+
162+
Returns [[]] when:
163+
- [path] is equal to [empty] (the path ["./"])
164+
165+
Raises [Invalid_argument] if [path] is an escaping path (contains leading
166+
[".."] after normalization).
167+
168+
{b Examples:}
169+
170+
Linting file ["a/b/c.ml"] should check configs in:
171+
{[
172+
ancestors_autoloading_dirs ~path:(v "a/b/c.ml")
173+
]}
174+
Returns: [["./"; "a/"; "a/b/"]]
175+
176+
Linting file ["file.ml"] at workspace root checks root config:
177+
{[
178+
ancestors_autoloading_dirs ~path:(v "file.ml")
179+
]}
180+
Returns: [["./"]].
181+
182+
Empty path has no ancestors:
183+
{[
184+
ancestors_autoloading_dirs ~path:empty
185+
]}
186+
Returns: [[]]
187+
188+
This function is used internally by the engine when linting individual files
189+
to discover which configuration files should be loaded from ancestor
190+
directories. *)
191+
val ancestors_autoloading_dirs : path:t -> t list
192+
193+
(** [paths_to_check_for_skip_predicates ~path] returns paths to check against
194+
skip predicates during tree traversal.
195+
196+
This function has different semantics from {!ancestors_autoloading_dirs}:
197+
- For files: returns parent directories only
198+
- For directories (trailing ["/"]): returns ancestors {b including the directory itself}
199+
- Never includes workspace root ["./"]
200+
201+
The returned list is ordered from root to deepest.
202+
203+
Returns [[]] when:
204+
- [path] is equal to [empty] (the path ["./"])
205+
- [path] is a file in the workspace root
206+
207+
Raises [Invalid_argument] if [path] is an escaping path (contains leading
208+
[".."] after normalization).
209+
210+
{b Examples:}
211+
212+
File paths return parent directories only:
213+
{[
214+
paths_to_check_for_skip_predicates ~path:(v "foo/bar/bin")
215+
(* Returns: ["foo/"; "foo/bar/"] *)
216+
]}
217+
218+
Directory paths (trailing ["/"]) include the directory itself:
219+
{[
220+
paths_to_check_for_skip_predicates ~path:(v "foo/bar/bin/")
221+
(* Returns: ["foo/"; "foo/bar/"; "foo/bar/bin/"] *)
222+
]}
223+
224+
Root and single files return empty:
225+
{[
226+
paths_to_check_for_skip_predicates ~path:(v "file.ml");
227+
(* Returns: [] *)
228+
paths_to_check_for_skip_predicates ~path:empty
229+
(* Returns: [] *)
230+
]}
231+
232+
This function is used when checking if paths match skip predicates in
233+
already-loaded configs. The directory-includes-self behavior is important:
234+
when visiting a directory during traversal, you want to check if that
235+
directory itself should be skipped. *)
236+
val paths_to_check_for_skip_predicates : path:t -> t list

0 commit comments

Comments
 (0)