/
Finder.pm
980 lines (734 loc) · 27.1 KB
/
Finder.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
use v6;
unit class Path::Finder:ver<0.2.0>;
use IO::Glob;
has Callable:D @!rules;
our enum Prune is export(:prune) <PruneInclusive PruneExclusive>;
submethod BUILD(:@!rules) { }
method !rules() {
return @!rules;
}
our enum Precedence is export(:traits) <Skip Depth Name Stat Content And None Or Not>;
our role Constraint {
has Precedence:D $.precedence is required;
}
multi sub trait_mod:<is>(Method $method, Precedence:D :$constraint!) is export(:traits) {
trait_mod:<of>($method, Path::Finder:D);
return $method does Constraint($constraint);
}
my multi rulify(Callable $rule) {
return $rule;
}
my multi rulify(Path::Finder:D $rule) {
return $rule!rules;
}
proto method and(*@) is constraint(And) { * }
multi method and(Path::Finder:D $self: *@also) {
return self.bless(:rules(flat @!rules, @also.map(&rulify)));
}
multi method and(Path::Finder:U: *@also) {
return self.bless(:rules(flat @also.map(&rulify)));
}
proto method none(|) is constraint(None) { * }
multi method none(Path::Finder:U: *@no) {
return self.or(|@no).not;
}
multi method none(Path::Finder:D: Callable $rule) {
return self.and: sub ($item, *%options) { return negate($rule($item, |%options)) };
}
my multi negate(Bool() $value) {
return !$value;
}
my multi negate(Prune $value) {
return Prune(+!$value)
}
method not() {
my $obj = self;
return self.bless(:rules[sub ($item, *%opts) {
return negate($obj!test($item, |%opts))
}]);
}
my multi unrulify(Callable $rule) {
return Path::Finder.and($rule);
}
my multi unrulify(Path::Finder $iterator) {
return $iterator;
}
proto method or(*@) is constraint(Or) { * }
multi method or(Path::Finder:U: $rule) {
return unrulify($rule);
}
multi method or(Path::Finder:U: *@also) {
my @iterators = |@also.map(&unrulify);
my @rules = sub ($item, *%opts) {
my $ret = False;
for @iterators -> $iterator {
given $iterator!test($item, |%opts) {
when PruneExclusive {
return PruneExclusive;
}
when * === True {
$ret = $_;
}
when PruneInclusive {
$ret = $_ if $ret === False;
}
}
}
return $ret;
}
return self.bless(:@rules);
}
method skip(*@garbage) is constraint(Skip) {
my @iterators = |@garbage.map(&unrulify);
self.and: sub ($item, *%opts) {
for @iterators -> $iterator {
my $var = $iterator!test($item, |%opts);
if $var || $var ~~ Prune {
return PruneInclusive;
}
}
return True;
};
}
method !test(IO::Path $item, *%args) {
my $ret = True;
for @!rules -> &rule {
my $value = rule($item, |%args);
return $value unless $value;
$ret = $value if $value === PruneExclusive;
}
return $ret;
}
my multi sub globulize(Any $name) {
return $name;
}
my multi sub globulize(Str $name) {
return glob($name);
}
method name(Mu $name) is constraint(Name) {
my $matcher = globulize($name);
self.and: sub ($item, *%) { $item.basename ~~ $matcher };
}
method ext(Mu $ext) is constraint(Name) {
self.and: sub ($item, *%) { $item.extension ~~ $ext };
}
method path(Mu $path) is constraint(Name) {
my $matcher = globulize($path);
self.and: sub ($item, *%) { ~$item ~~ $matcher };
}
method relpath(Mu $path ) is constraint(Name) {
my $matcher = globulize($path);
self.and: sub ($item, :$base, *%) { $item.relative($base) ~~ $matcher };
}
method io(Mu $path) is constraint(Name) {
self.and: sub ($item, *%) { $item ~~ $path };
}
method dangling(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { ($item.l && !$item.e) === $value };
}
method readable(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.r === $value };
}
method writable(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.w === $value };
}
method executable(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.x === $value };
}
method read-writable(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.rw === $value };
}
method read-write-executable(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.rwx === $value };
}
method exists(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.e === $value };
}
method file(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.f === $value };
}
method directory(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.d === $value };
}
method symlink(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.l === $value }
}
method empty(Bool $value = True) is constraint(Stat) {
self.and: sub ($item, *%) { $item.z === $value };
}
my sub stat-check($field, $matcher) {
return sub ($item, *%) {
use nqp;
return nqp::stat(nqp::unbox_s($item.absolute), $field) ~~ $matcher;
}
}
method inode(Mu $inode) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_PLATFORM_INODE, $inode);
}
method device(Mu $device) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_PLATFORM_DEV, $device);
}
method nlinks(Mu $nlinks) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_PLATFORM_NLINKS, $nlinks);
}
method uid(Mu $uid) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_UID, $uid);
}
method gid(Mu $gid) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_GID, $gid);
}
method special(Bool $value = True) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_ISDEV, +$value);
}
method blocks(Mu $blocks) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_PLATFORM_BLOCKS, $blocks);
}
method blocksize(Mu $blocksize) is constraint(Stat) {
self.and: stat-check(nqp::const::STAT_PLATFORM_BLOCKSIZE, $blocksize);
}
method accessed(Mu $accessed) is constraint(Stat) {
self.and: sub ($item, *%) { $item.accessed ~~ $accessed };
}
method changed(Mu $changed) is constraint(Stat) {
self.and: sub ($item, *%) { $item.changed ~~ $changed };
}
method modified(Mu $modified) is constraint(Stat) {
self.and: sub ($item, *%) { $item.modified ~~ $modified };
}
method mode(Mu $mode) is constraint(Stat) {
self.and: sub ($item, *%) { $item.mode ~~ $mode };
}
method size(Mu $size) is constraint(Stat) {
self.and: sub ($item, *%) { $item.s ~~ $size };
}
proto method depth($) is constraint(Depth) { * }
multi method depth(Range $depth-range where .is-int) {
my ($min, $max) = $depth-range.int-bounds;
self.and: sub ($item, :$depth, *%) {
return do given $depth {
when $max {
PruneExclusive;
}
when $depth-range {
True;
}
when * < $min {
False;
}
default {
PruneInclusive;
}
}
};
}
multi method depth(Int $depth) {
return self.depth($depth..$depth);
}
multi method depth(Mu $depth-match) {
self.and: sub ($item, :$depth, *%) {
return $depth ~~ $depth-match;
}
}
method skip-dir(Mu $pattern) is constraint(Skip) {
self.and: sub ($item, *%) {
if $item.basename ~~ $pattern && $item.d {
return PruneInclusive;
}
return True;
}
}
method skip-subdir(Mu $pattern) is constraint(Skip) {
self.and: sub ($item, :$depth, *%) {
if $depth > 0 && $item.basename ~~ $pattern && $item.d {
return PruneInclusive;
}
return True;
}
}
method skip-hidden(Bool $hide = True) is constraint(Skip) {
if $hide {
self.and: sub ($item, :$depth, *%) {
if $depth > 0 && $item.basename ~~ rx/ ^ '.' / {
return PruneInclusive;
}
return True;
}
}
}
my $vcs-dirs = any(<.git .bzr .hg _darcs CVS RCS .svn>, |($*DISTRO.name eq 'mswin32' ?? '_svn' !! ()));
my $vcs-files = none(rx/ '.#' $ /, rx/ ',v' $ /);
method skip-vcs(Bool $hide = True) is constraint(Skip) {
self.skip-dir($vcs-dirs).name($vcs-files) if $hide;
}
subset FailedToOpen of Exception where .message.contains('Failed to open file');
proto method shebang(Mu $pattern, *%opts) is constraint(Content) { * }
multi method shebang(Mu $pattern, *%opts) {
self.and: sub ($item, *%) {
return False unless $item.f;
return $item.lines(|%opts)[0] ~~ $pattern;
CATCH { when FailedToOpen { return False } }
};
}
multi method shebang(Bool $value = True, *%opts) {
self.and: sub ($item, *%) {
return !$value unless $item.f;
return ?($item.lines(|%opts)[0] ~~ rx/ ^ '#!' /) === $value;
CATCH { when FailedToOpen { return False } }
};
}
method contents(Mu $pattern, *%opts) is constraint(Content) {
self.and: sub ($item, *%) {
return False unless $item.f;
return $item.slurp(|%opts) ~~ $pattern;
CATCH { when FailedToOpen { return False } }
};
}
method lines(Mu $pattern, *%opts) is constraint(Content) {
self.and: sub ($item, *%) {
return False unless $item.f;
for $item.lines(|%opts) -> $line {
return True if $line ~~ $pattern;
}
return False;
CATCH { when FailedToOpen { return False } }
}
}
method no-lines(Mu $pattern, *%opts) is constraint(Content) {
self.and: sub ($item, *%) {
return True unless $item.f;
for $item.lines(|%opts) -> $line {
return False if $line ~~ $pattern;
}
return True;
CATCH { when FailedToOpen { return True } }
}
}
enum Order is export(:DEFAULT :order) < BreadthFirst PreOrder PostOrder >;
my %as{Any:U} = ((Str) => { ~$_ }, (IO::Path) => Block);
multi method in(Path::Finder:D:
*@dirs,
Bool:D :$follow-symlinks = True,
Bool:D :$report-symlinks = $follow-symlinks,
Order:D :$order = BreadthFirst,
Bool:D :$sorted = True,
Bool:D :$loop-safe = $*DISTRO.name ne any(<MSWin32 os2 dos NetWare symbian>),
Bool:D :$relative = False,
Bool:D :$keep-going = True,
Bool:D :$quiet = False,
Any:U :$as = IO::Path,
:&map = %as{$as},
--> Seq:D
) {
my @queue = (@dirs || '.').map(*.IO).map: { ($^path, 0, $^path, Bool) };
my Bool %seen;
sub is-unique (IO::Path $item) {
use nqp;
my $inode = nqp::stat(nqp::unbox_s($item.absolute), nqp::const::STAT_PLATFORM_INODE);
my $device = nqp::stat(nqp::unbox_s($item.absolute), nqp::const::STAT_PLATFORM_DEV);
my $key = "$inode\-$device";
return False if %seen{$key};
return %seen{$key} = True;
}
my Bool $check-symlinks = !$follow-symlinks || !$report-symlinks;
my $seq := gather while @queue {
my ($item, $depth, $base, $result) = @( @queue.shift );
without $result {
my $is-link = $check-symlinks ?? $item.l !! False;
next if $is-link && !$report-symlinks;
$result = self!test($item, :$depth, :$base);
my $prune = $result ~~ Prune || $is-link && !$follow-symlinks;
if !$prune && $item.d && (!$loop-safe || is-unique($item)) {
my @next = $item.dir.self.map: { ($^child, $depth + 1, $base, Bool) };
@next .= sort if $sorted;
given $order {
when BreadthFirst {
@queue.append: @next;
}
when PostOrder {
@next.push: ($item, $depth, $base, $result);
@queue.prepend: @next;
next;
}
when PreOrder {
@queue.prepend: @next;
}
}
CATCH {
when X::IO::Dir {
.rethrow unless $keep-going;
.message.note unless $quiet;
}
}
}
}
take $relative ?? $item.relative($base).IO !! $item if $result;
}
return &map ?? $seq.map(&map) !! $seq;
}
multi method in(Path::Finder:U: |args --> Seq:D){
return self.new.in(|args);
}
our sub finder(Path::Finder :$base = Path::Finder, *%options --> Path::Finder) is export(:find) {
class Entry {
has $.name;
has $.method handles <precedence>;
has $.capture;
method call-with($object) {
return $object.$!method(|$!capture);
}
};
my Entry @entries;
for %options.kv -> $name, $value {
my $method = $base.^lookup($name);
die "Finder key $name invalid" if not $method.defined or $method !~~ Constraint;
my $capture = $value ~~ Capture ?? $value !! do given $method.signature.count - 1 {
when 1 {
\($value);
}
when Inf {
\(|@($value).map: -> $entry { $entry ~~ Hash|Pair ?? finder(|%($entry)) !! $entry });
}
}
@entries.push: Entry.new(:$name, :$method, :$capture);
}
my @keys = @entries.sort(*.precedence);
return ($base, |@keys).reduce: -> $object, $entry {
$entry.call-with($object);
}
}
our sub find(*@dirs, *%options --> Seq:D) is export(:DEFAULT :find) {
my %in-options = %options<follow-symlinks order sorted loop-safe relative keep-going quiet as map>:delete:p;
return finder(|%options).in(|@dirs, |%in-options);
}
=begin pod
=head1 SYNOPSIS
use Path::Finder;
my $finder = Path::Finder; # match anything
$finder = $rule.file.size(* > 10_000); # add/chain rules
# iterator interface
for $finder.in(@dirs) -> $file {
...
}
# functional interface
for find(@dirs, :file, :size(* > 10_000)) -> $file {
...
}
=head1 DESCRIPTION
This module iterates over files and directories to identify ones matching a
user-defined set of rules. The object-oriented API is based heavily on
perl5's C<Path::Iterator::Rule>. A C<Path::Finder> object is a
collection of rules (match criteria) with methods to add additional criteria.
Options that control directory traversal are given as arguments to the method
that generates an iterator. There is also a functional interface that is often
simpler in use, even if it allows for slightly less control.
Here is a summary of features for comparison to other file finding modules:
=item provides many "helper" methods for specifying rules
=item offers (lazy) list interface
=item custom rules implemented with roles
=item breadth-first (default) or pre- or post-order depth-first searching
=item follows symlinks (by default, but can be disabled)
=item directories visited only once (no infinite loop; can be disabled)
=item doesn't chdir during operation
=item provides an API for extensions
=head1 USAGE
Path::Finder objects are immutable. All methods except C<in> return a new object combining the existing rules with the additional rules provided.
=head2 Matching and iteration
=head3 C<in>
for $finder.in(@dirs, |%options) -> $file {
...
}
Creates a sequence of results. This sequence is "lazy" -- results are not
pre-computed.
It takes as arguments a list of directories to search and named arguments as
control options. If no search directories are provided, the
current directory is used (C<".">). Valid options include:
=item C<order> -- Controls order of results. Valid values are C<BreadthFirst> (breadth-first search), C<PreOrder> (pre-order, depth-first search), C<PostOrder> (post-order, depth-first search). The default is C<BreadthFirst>.
=item C<follow-symlinks> - Follow directory symlinks when true. Default is C<True>.
=item C<report-symlinks> - Includes symlinks in results when true. Default is equal to C<follow-symlinks>.
=item C<loop-safe> - Prevents visiting the same directory more than once when true. Default is C<True>.
=item C<relative> - Return matching items relative to the search directory. Default is C<False>.
=item C<sorted> - Whether entries in a directory are sorted before processing. Default is C<True>.
=item C<keep-going> - Whether or not the search should continue when an error is encountered (typically and unreadable directory). Defaults to C<True>.
=item C<quiet> - Whether printing non-fatal errors to C<$*ERR> is repressed. Defaults to C<False>.
=item C<as> - The type of values that will be returned. Valid values are C<IO::Path> (the default) and C<Str>.
Filesystem loops might exist from either hard or soft links. The C<loop-safe>
option prevents infinite loops, but adds some overhead by making C<stat> calls.
Because directories are visited only once when C<loop-safe> is true, matches
could come from a symlinked directory before the real directory depending on
the search order.
To get only the real files, turn off C<follow-symlinks>. You can have
symlinks included in results, but not descend into symlink directories if
you turn off C<follow-symlinks>, but turn on C<report-symlinks>.
Turning C<loop-safe> off and leaving C<follow-symlinks> on avoids C<stat> calls
and will be fastest, but with the risk of an infinite loop and repeated files.
The default is slow, but safe.
If the search directories are absolute and the C<relative> option is true,
files returned will be relative to the search directory. Note that if the
search directories are not mutually exclusive (whether containing
subdirectories like C<@*INC> or symbolic links), files found could be returned
relative to different initial search directories based on C<order>,
C<follow-symlinks> or C<loop-safe>.
=head2 Logic operations
C<Path::Finder> provides three logic operations for adding rules to the
object. Rules may be either a subroutine reference with specific semantics
or another C<Path::Finder> object.
=head3 C<and>
$finder.and($finder2) ;
$finder.and(-> $item, *%) { $item ~~ :rwx });
$finder.and(@more-rules);
find(:and(@more-rules));
This creates a new rule combining the curent one and the arguments. E.g.
"old rule AND new1 AND new2 AND ...".
=head3 C<or>
$finder.or(
Path::Finder.name("foo*"),
Path::Finder.name("bar*"),
-> $item, *% { $item ~~ :rwx },
);
This creates a new rule combining the curent one and the arguments. E.g.
"old rule OR new1 OR new2 OR ...".
=head3 C<none>
$finder.none( -> $item, *% { $item ~~ :rwx } );
This creates a new rule combining the current one and one or more alternatives
and adds them as a negative constraint to the current rule. E.g. "old rule AND
NOT ( new1 AND new2 AND ...)". Returns the object to allow method chaining.
=head3 C<not>
$finder.not();
This creates a new rule negating the whole original rule. Returns the object to
allow method chaining.
=head3 C<skip>
$finder.skip(
$finder.new.dir.not-writeable,
$finder.new.dir.name("foo"),
);
Takes one or more alternatives and will prune a directory if any of the
criteria match or if any of the rules already indicate the directory should be
pruned. Pruning means the directory will not be returned by the iterator and
will not be searched.
For files, it is equivalent to C<< $finder.none(@rules) >>. Returns
the object to allow method chaining.
This method should be called as early as possible in the rule chain.
See C<skip-dir> below for further explanation and an example.
=head1 RULE METHODS
Rule methods are helpers that add constraints. Internally, they generate a
closure to accomplish the desired logic and add it to the rule object with the
C<and> method. Rule methods return the object to allow for method chaining.
Generally speaking there are two kinds of rule methods: the ones that take a
value to smartmatch some property against (e.g. C<name>), and ones that take
a boolean (defaulting to C<True>) to check a boolean value against (e.g.
C<readable>).
=head2 File name rules
=head3 C<name>
$finder.name("foo.txt");
find(:name<foo.txt>);
The C<name> method takes a pattern and creates a rule that is true
if it matches the B<basename> of the file or directory path.
Patterns may be anything that can smartmatch a string. If it's a string
it will be interpreted as a L<glob|IO::Glob> pattern.
=head3 C<path>
$finder.path( "foo/*.txt" );
find(:path<foo/*.txt>);
The C<path> method takes a pattern and creates a rule that is true
if it matches the path of the file or directory.
Patterns may be anything that can smartmatch a string. If it's a string
it will be interpreted as a L<glob|IO::Glob> pattern.
=head3 C<relpath>
$finder.relpath( "foo/bar.txt" );
find(:relpath<foo/bar.txt>);
$finder.relpath( any(rx/foo/, "bar.*"));
find(:relpath(any(rx/foo/, "bar.*"))
The C<relpath> method takes a pattern and creates a rule that is true
if it matches the path of the file or directory relative to its basedir.
Patterns may be anything that can smartmatch a string. If it's a string
it will be interpreted as a L<glob|IO::Glob> pattern.
=head3 C<io>
$finder.path(:f|:d);
find(:path(:f|:d);
The C<io> method takes a pattern and creates a rule that is true
if it matches the C<IO> of the file or directory. This is mainly
useful for combining filetype tests.
=head3 C<ext>
The C<name> method takes a pattern and creates a rule that is true
if it matches the extension of path. Patterns may be anything that can
smartmatch a string.
=head3 C<skip-dir>
$finder.skip-dir( $pattern );
The C<skip-dir> method skips directories that match a pattern. Directories
that match will not be returned from the iterator and will be excluded from
further search. B<This includes the starting directories.> If that isn't
what you want, see C<skip-subdir> instead.
B<Note:> this rule should be specified early so that it has a chance to
operate before a logical shortcut. E.g.
$finder.skip-dir(".git").file; # OK
$finder.file.skip-dir(".git"); # Won't work
In the latter case, when a ".git" directory is seen, the C<file> rule
shortcuts the rule before the C<skip-dir> rule has a chance to act.
=head3 C<skip-subdir>
$finder.skip-subdir( @patterns );
This works just like C<skip-dir>, except that the starting directories
(depth 0) are not skipped and may be returned from the iterator
unless excluded by other rules.
=head2 File test rules
Most of the C<:X> style filetest are available as boolean rules:
=head3 readable
This checks if the entry is readable
=head3 writable
This checks if the entry is writable
=head3 executable
This checks if the entry is executable
=head3 file
This checks if the entry is a file
=head3 directory
This checks if the entry is a directory
=head3 symlink
This checks if the entry is a symlink
=head3 special
This checks if the entry is anything but a file, directory or symlink.
=head3 exists
This checks if the entry exists
=head3 empty
This checks if the entry is empty
For example:
$finder.file.empty;
Two composites are also available:
=head3 read-writable
This checks if the entry is readable and writable
=head3 read-write-executable
This checks if the entry is readable, writable and executable
=head3 C<dangling>
$finder.dangling;
The C<dangling> rule method matches dangling symlinks. It's equivalent to
$finder.symlink.exists(False)
The timestamps methods take a single argument in a form that
can smartmatch an C<Instant>.
=head3 accessed
Compares the access time
=head3 modified
Compares the modification time
=head3 changed
Compares the (inode) change time
For example:
# hour old
$finder.modified(* < now - 60 * 60);
It also supports the following integer based matching rules:
=head3 size
This compares the size of the entry
=head3 mode
This compares the mode of the entry
=head3 device
This compares the device of the entry. This may not be available everywhere.
=head3 inode
This compares the inode of the entry. This may not be available everywhere.
=head3 nlinks
This compares the link count of the entry. This may not be available everywhere.
=head3 uid
This compares the user identifier of the entry.
=head3 gid
This compares the group identifier of the entry.
=head3 blocks
This compares the number of blocks in the entry.
=head3 blocksize
This compares the blocksize of the entry.
For example:
$finder.size(* > 10240)
=head2 Depth rules
$finder.depth(3..5);
The C<depth> rule method take a single range argument and limits
the paths returned to a minimum or maximum depth (respectively) from the
starting search directory, or an integer representing a specific depth. A depth
of 0 means the starting directory itself. A depth of 1 means its children.
(This is similar to the Unix C<find> utility.)
=head2 Version control file rules
# Skip all known VCS files
$finder.skip-vcs;
Skips files and/or prunes directories related to a version control system.
Just like C<skip-dir>, these rules should be specified early to get the
correct behavior.
=head2 File content rules
=head3 C<contents>
$finder.contents(rx/BEGIN .* END/);
The C<contents> rule takes a list of regular expressions and returns
files that match one of the expressions.
The expressions are applied to the file's contents as a single string. For
large files, this is likely to take significant time and memory.
Files are assumed to be encoded in UTF-8, but alternative encodings can
be passed as a named argument:
$finder.contents(rx/BEGIN .* END/xs, :enc<latin1>);
=head3 C<lines>
$finder.lines(rx:i/^new/);
The C<line> rule takes a list of regular expressions and returns
files with at least one line that matches one of the expressions.
Files are assumed to be encoded in UTF-8, but alternative Perl IO layers can
be passed like in C<contents>
=head3 C<no-lines>
$finder.no-lines(rx:i/^new/);
The C<line> rule takes a list of regular expressions and returns
files with no lines that matches one of the expressions.
Files are assumed to be encoded in UTF-8, but alternative Perl IO layers can
be passed like in C<contents>
=head3 C<shebang>
$finder.shebang(rx/#!.*\bperl\b/);
The C<shebang> rule takes a value and checks it against the first line of a
file. The default checks for C<rx/^#!/>.
=head2 Other rules
=head1 EXTENDING
=head2 Custom rule subroutines
Rules are implemented as (usually anonymous) subroutine callbacks that return
a value indicating whether or not the rule matches. These callbacks are called
with three arguments. The only positional argument is a path.
$finder.and( sub ($item, *%args) { $item ~~ :r & :w & :x } );
The named arguments contain more information for such a check
For example, the C<depth> key is used to support minimum and maximum
depth checks.
The custom rule subroutine must return one of four values:
=item C<True> -- indicates the constraint is satisfied
=item C<False> -- indicates the constraint is not satisfied
=item C<PruneExclusive> -- indicate the constraint is satisfied, and prune if it's a directory
=item C<PruneInclusive> -- indicate the constraint is not satisfied, and prune if it's a directory
Here is an example. This is equivalent to the "depth" rule method with
a depth of C<0..3>:
$finder.and(
sub ($path, :$depth, *%) {
return $depth < 3 ?? True !! PruneExclusive;
}
);
Files and directories and directories up to depth 3 will be returned and
directories will be searched. Files of depth 3 will be returned. Directories
of depth 3 will be returned, but their contents will not be added to the
search.
Once a directory is flagged to be pruned, it will be pruned regardless of
subsequent rules.
$finder.depth(0..3).name(rx/foo/);
This will return files or directories with "foo" in the name, but all
directories at depth 3 will be pruned, regardless of whether they match the
name rule.
Generally, if you want to do directory pruning, you are encouraged to use the
C<skip> method instead of writing your own logic using C<PruneExclusive> and
C<PruneInclusive>.
=head1 PERFORMANCE
By default, C<Path::Finder> iterator options are "slow but safe". They
ensure uniqueness, return files in sorted order, and throw nice error messages
if something goes wrong.
If you want speed over safety, set these options:
:!loop-safe, :!sorted, :order(PreOrder)
Depending on the file structure being searched, C<< :order(PreOrder) >> may or
may not be a good choice. If you have lots of nested directories and all the
files at the bottom, a depth first search might do less work or use less
memory, particularly if the search will be halted early (e.g. finding the first
N matches.)
Rules will shortcut on failure, so be sure to put rules likely to fail
early in a rule chain.
Consider:
$f1 = Path::Finder.new.name(rx/foo/).file;
$f2 = Path::Finder.new.file.name(rx/foo/);
If there are lots of files, but only a few containing "foo", then
C<$f1> above will be faster.
Rules are implemented as code references, so long chains have
some overhead. Consider testing with a custom coderef that
combines several tests into one.
Consider:
$f3 = Path::Finder.new.read-write-executable;
$f4 = Path::Finder.new.readable.writeable.executable;
Rule C<$f3> above will be much faster, not only because it stacks
the file tests, but because it requires to only check a single rule.
=end pod