Skip to content

Commit

Permalink
Added a filetype of "--text", which matches any text file. This
Browse files Browse the repository at this point in the history
is off by default, just as --binary is.

Added a filetype of "--ignored", which matches any file that
ack ignores, like core dumps and backup files.  It will NOT
include files that are in ignored directories like blib/ and
.svn/
  • Loading branch information
petdance committed Jun 13, 2007
1 parent 1ebff69 commit 4484844
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 61 deletions.
20 changes: 12 additions & 8 deletions Ack.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ BEGIN {
elisp => [qw( el )],
haskell => [qw( hs lhs )],
html => [qw( htm html shtml )],
ignored => q{Files, but not directories, normally ignored by ack (default: off)},
lisp => [qw( lisp )],
java => [qw( java properties )],
js => [qw( js )],
Expand All @@ -55,6 +56,7 @@ BEGIN {
sql => [qw( sql ctl )],
tcl => [qw( tcl )],
tex => [qw( tex cls sty )],
text => q{Text files, as defined by Perl's -T op (default: off)},
tt => [qw( tt tt2 ttml )],
vb => [qw( bas cls frm ctl vb resx )],
vim => [qw( vim )],
Expand Down Expand Up @@ -105,22 +107,24 @@ F<foo.pod> could be "perl" or "parrot".
The filetype will be C<undef> if we can't determine it. This could
be if the file doesn't exist, or it can't be read.
It will be '-ignore' if it's something that ack should always ignore,
It will be 'ignored' if it's something that ack should always ignore,
even under -a.
=cut

use constant TEXT => 'text';

sub filetypes {
my $filename = shift;

return '-ignore' unless is_searchable( $filename );
return 'ignored' unless is_searchable( $filename );

return 'make' if $filename =~ m{$path_sep?Makefile$}io;
return ('make',TEXT) if $filename =~ m{$path_sep?Makefile$}io;

# If there's an extension, look it up
if ( $filename =~ m{\.([^\.$path_sep]+)$}o ) {
my $ref = $types{lc $1};
return @{$ref} if $ref;
return (@{$ref},TEXT) if $ref;
}

# At this point, we can't tell from just the name. Now we have to
Expand Down Expand Up @@ -155,12 +159,12 @@ sub filetypes {
}

if ( $header =~ /^#!/ ) {
return $1 if $header =~ /\b(ruby|p(erl|hp|ython))\b/;
return 'shell' if $header =~ /\b(?:ba|c|k|z)?sh\b/;
return ($1,TEXT) if $header =~ /\b(ruby|p(erl|hp|ython))\b/;
return ('shell','text') if $header =~ /\b(?:ba|c|k|z)?sh\b/;
}
return 'xml' if $header =~ /<\?xml /;
return ('xml',TEXT) if $header =~ /<\?xml /;

return;
return (TEXT);
}

=head2 is_searchable( $filename )
Expand Down
9 changes: 9 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ NEXT
set this useful environment variable, your tests would fail.
Thanks, Michael Hendricks.

[ENHANCEMENTS]
Added a filetype of "--text", which matches any text file. This
is off by default, just as --binary is.

Added a filetype of "--ignored", which matches any file that
ack ignores, like core dumps and backup files. It will NOT
include files that are in ignored directories like blib/ and
.svn/

1.63_01
[THINGS THAT MAY BREAK FOR YOU]
The changes I made to detect if ack is outputting to the screen
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ t/00-load.t
t/ack-a.t
t/ack-binary.t
t/ack-c.t
t/ack-text.t
t/ack-type.t
t/ack-v.t
t/filetypes.t
Expand Down
2 changes: 1 addition & 1 deletion ack
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ MAIN: {
# If anyone says --no-whatever, we assume all other types must be on.
if ( !$filetypes_supported_set ) {
for my $i ( keys %type_wanted ) {
$type_wanted{$i} = 1 unless ( defined( $type_wanted{$i} ) || $i eq 'binary' || $i eq 'text' );
$type_wanted{$i} = 1 unless ( defined( $type_wanted{$i} ) || $i eq 'binary' || $i eq 'text' || $i eq 'ignored' );
}
}

Expand Down
2 changes: 2 additions & 0 deletions ack-help-types.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Note that some extensions may appear in multiple types. For example,
--[no]elisp .el
--[no]haskell .hs .lhs
--[no]html .htm .html .shtml
--[no]ignored Files, but not directories, normally ignored by ack (default: off)
--[no]java .java .properties
--[no]js .js
--[no]jsp .jsp .jspx .jhtm .jhtml
Expand All @@ -33,6 +34,7 @@ Note that some extensions may appear in multiple types. For example,
--[no]sql .sql .ctl
--[no]tcl .tcl
--[no]tex .tex .cls .sty
--[no]text Text files, as defined by Perl's -T op (default: off)
--[no]tt .tt .tt2 .ttml
--[no]vb .bas .cls .frm .ctl .vb .resx
--[no]vim .vim
Expand Down
21 changes: 13 additions & 8 deletions ack-standalone
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ MAIN: {
'o|output:s' => \$opt{o},
'Q|literal' => \$opt{Q},
'sort-files' => \$opt{sort_files},
'text' => \$opt{text},
'v|invert-match' => \$opt{v},
'w|word-regexp' => \$opt{w},

Expand Down Expand Up @@ -133,7 +134,7 @@ MAIN: {
# If anyone says --no-whatever, we assume all other types must be on.
if ( !$filetypes_supported_set ) {
for my $i ( keys %type_wanted ) {
$type_wanted{$i} = 1 unless ( defined( $type_wanted{$i} ) || $i eq 'binary' );
$type_wanted{$i} = 1 unless ( defined( $type_wanted{$i} ) || $i eq 'binary' || $i eq 'text' || $i eq 'ignored' );
}
}

Expand Down Expand Up @@ -905,6 +906,7 @@ BEGIN {
elisp => [qw( el )],
haskell => [qw( hs lhs )],
html => [qw( htm html shtml )],
ignored => q{Files, but not directories, normally ignored by ack (default: off)},
lisp => [qw( lisp )],
java => [qw( java properties )],
js => [qw( js )],
Expand All @@ -922,6 +924,7 @@ BEGIN {
sql => [qw( sql ctl )],
tcl => [qw( tcl )],
tex => [qw( tex cls sty )],
text => q{Text files, as defined by Perl's -T op (default: off)},
tt => [qw( tt tt2 ttml )],
vb => [qw( bas cls frm ctl vb resx )],
vim => [qw( vim )],
Expand Down Expand Up @@ -950,17 +953,19 @@ sub skipdir_filter {
}


use constant TEXT => 'text';

sub filetypes {
my $filename = shift;

return '-ignore' unless is_searchable( $filename );
return 'ignored' unless is_searchable( $filename );

return 'make' if $filename =~ m{$path_sep?Makefile$}io;
return ('make',TEXT) if $filename =~ m{$path_sep?Makefile$}io;

# If there's an extension, look it up
if ( $filename =~ m{\.([^\.$path_sep]+)$}o ) {
my $ref = $types{lc $1};
return @{$ref} if $ref;
return (@{$ref},TEXT) if $ref;
}

# At this point, we can't tell from just the name. Now we have to
Expand Down Expand Up @@ -995,12 +1000,12 @@ sub filetypes {
}

if ( $header =~ /^#!/ ) {
return $1 if $header =~ /\b(ruby|p(erl|hp|ython))\b/;
return 'shell' if $header =~ /\b(?:ba|c|k|z)?sh\b/;
return ($1,TEXT) if $header =~ /\b(ruby|p(erl|hp|ython))\b/;
return ('shell','text') if $header =~ /\b(?:ba|c|k|z)?sh\b/;
}
return 'xml' if $header =~ /<\?xml /;
return ('xml',TEXT) if $header =~ /<\?xml /;

return;
return (TEXT);
}


Expand Down
72 changes: 72 additions & 0 deletions t/ack-text.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!perl

use warnings;
use strict;

use Test::More tests => 1;
use App::Ack ();
use File::Next ();
delete $ENV{ACK_OPTIONS};

use lib 't';
use Util;


ACK_F_TEXT: {
my @expected = qw(
t/00-load.t
t/ack-a.t
t/ack-binary.t
t/ack-c.t
t/ack-text.t
t/ack-type.t
t/ack-v.t
t/etc/buttonhook.html.xxx
t/etc/buttonhook.noxml.xxx
t/etc/buttonhook.rfc.xxx
t/etc/buttonhook.rss.xxx
t/etc/buttonhook.xml.xxx
t/etc/shebang.foobar.xxx
t/etc/shebang.php.xxx
t/etc/shebang.pl.xxx
t/etc/shebang.py.xxx
t/etc/shebang.rb.xxx
t/etc/shebang.sh.xxx
t/filetypes.t
t/interesting.t
t/longopts.t
t/pod-coverage.t
t/pod.t
t/standalone.t
t/swamp/0
t/swamp/c-header.h
t/swamp/c-source.c
t/swamp/html.htm
t/swamp/html.html
t/swamp/javascript.js
t/swamp/Makefile
t/swamp/Makefile.PL
t/swamp/options.pl
t/swamp/parrot.pir
t/swamp/perl-test.t
t/swamp/perl-without-extension
t/swamp/perl.cgi
t/swamp/perl.pl
t/swamp/perl.pm
t/swamp/perl.pod
t/text/boy-named-sue.txt
t/text/freedom-of-choice.txt
t/text/science-of-myth.txt
t/text/shut-up-be-happy.txt
t/Util.pm
t/zero.t
);

my @files = qw( t );
my @args = qw( -f --text );
my $cmd = "$^X ./ack-standalone @args @files";
my @results = `$cmd`;
chomp @results;

sets_match( \@results, \@expected, 'Looking for text files' );
}
1 change: 1 addition & 0 deletions t/ack-type.t
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ my $perl = [qw(
t/ack-a.t
t/ack-binary.t
t/ack-c.t
t/ack-text.t
t/ack-type.t
t/ack-v.t
t/etc/shebang.pl.xxx
Expand Down
78 changes: 34 additions & 44 deletions t/filetypes.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
use warnings;
use strict;

use Test::More tests => 28;
use Data::Dumper;
use Test::More tests => 29;
delete $ENV{ACK_OPTIONS};

use lib 't';
Expand All @@ -14,64 +13,55 @@ BEGIN {
use_ok( 'App::Ack' );
}

my @foo_pod_types = App::Ack::filetypes( 'foo.pod' ); # 5.6.1 doesn't like to sort(filetypes())
sets_match( [@foo_pod_types], [qw( parrot perl )], 'foo.pod can be multiple things' );
sets_match( [App::Ack::filetypes( 'Bongo.pm' )], [qw( perl )], 'Bongo.pm' );
sets_match( [App::Ack::filetypes( 'Makefile.PL' )], [qw( perl )], 'Makefile.PL' );
sets_match( [App::Ack::filetypes( 'foo.pod' )], [qw( parrot perl text )], 'foo.pod can be multiple things' );
sets_match( [App::Ack::filetypes( 'Bongo.pm' )], [qw( perl text )], 'Bongo.pm' );
sets_match( [App::Ack::filetypes( 'Makefile.PL' )], [qw( perl text )], 'Makefile.PL' );
sets_match( [App::Ack::filetypes( 'Unknown.wango' )], [], 'Unknown' );

ok( is_filetype( 'foo.pod', 'perl' ), 'foo.pod can be perl' );
ok( is_filetype( 'foo.pod', 'parrot' ), 'foo.pod can be parrot' );
ok( is_filetype( 'foo.pod', 'text' ), 'foo.pod can be parrot' );
ok( !is_filetype( 'foo.pod', 'ruby' ), 'foo.pod cannot be ruby' );
ok( is_filetype( 'foo.handler.pod', 'perl' ), 'foo.handler.pod can be perl' );
ok( is_filetype( '/tmp/wango/foo.pod', 'perl' ), '/tmp/wango/foo.pod can be perl' );
ok( is_filetype( '/tmp/wango/foo.handler.pod', 'perl' ), '/tmp/wango/foo.handler.pod can be perl' );
ok( is_filetype( '/tmp/blongo/makefile', 'make' ), '/tmp/blongo/makefile is a makefile' );
ok( is_filetype( 'Makefile', 'make' ), 'Makefile is a makefile' );

is(App::Ack::filetypes('foo.pod~'), '-ignore',
is(App::Ack::filetypes('foo.pod~'), 'ignored',
'correctly ignore backup file');

is(App::Ack::filetypes('#some.pod#'), '-ignore',
is(App::Ack::filetypes('#some.pod#'), 'ignored',
'correctly ignore files starting and ending with hash mark');

is(App::Ack::filetypes('core.987654321'), '-ignore',
is(App::Ack::filetypes('core.987654321'), 'ignored',
'correctly ignore files named core.NNNN');

is(App::Ack::filetypes('t/etc/shebang.pl.xxx'), 'perl',
'file identified as Perl from shebang line');

is(App::Ack::filetypes('t/etc/shebang.php.xxx'), 'php',
'file identified as PHP from shebang line');

is(App::Ack::filetypes('t/etc/shebang.py.xxx'), 'python',
'file identified as Python from shebang line');

is(App::Ack::filetypes('t/etc/shebang.rb.xxx'), 'ruby',
'file identified as Ruby from shebang line');

is(App::Ack::filetypes('t/etc/shebang.sh.xxx'), 'shell',
'file identified as shell from shebang line');

ok(! defined App::Ack::filetypes('t/etc/shebang.foobar.xxx'),
'file could not be identified from shebang line');

is(App::Ack::filetypes('t/etc/shebang.empty.xxx'), 'binary',
'empty file returns "binary"');

## Tests documenting current behavior in 1.50
is(App::Ack::filetypes('t/etc/buttonhook.xml.xxx'), 'xml',
'file identified as xml from <?xml line');

ok(! defined App::Ack::filetypes('t/etc/buttonhook.noxml.xxx'),
'no <?xml> found, so no filetype');


is(App::Ack::filetypes('t/etc/buttonhook.xml.xxx'),'xml',
'filetype by <?xml>');
MATCH_VIA_CONTENT: {
my %lookups = (
't/swamp/Makefile' => 'make',
't/swamp/Makefile.PL' => 'perl',
't/swamp/buttonhook.xml' => 'xml',
't/etc/shebang.php.xxx' => 'php',
't/etc/shebang.pl.xxx' => 'perl',
't/etc/shebang.py.xxx' => 'python',
't/etc/shebang.rb.xxx' => 'ruby',
't/etc/shebang.sh.xxx' => 'shell',
't/etc/buttonhook.xml.xxx' => 'xml',
);
for my $filename ( sort keys %lookups ) {
my $type = $lookups{$filename};
sets_match( [App::Ack::filetypes( $filename )], [ $type, 'text' ], "Checking $filename" );
}

is(App::Ack::filetypes('t/etc/shebang.empty.xxx'), 'binary',
'empty file returns "binary"');
}

sets_match([App::Ack::filetypes('t/swamp/buttonhook.xml')], ['xml'],
'file identified as xml ');
FAIL_MATCHING_VIA_CONTENT: {
is( App::Ack::filetypes('t/etc/shebang.foobar.xxx'), 'text',
'file could not be identified from shebang line');

ok(! defined App::Ack::filetypes('t/etc/x.html.xxx'),
'<!DOCTYPE not yet supported so no filetype');
is( App::Ack::filetypes('t/etc/buttonhook.noxml.xxx'), 'text',
'no <?xml> found, so no filetype');
}

0 comments on commit 4484844

Please sign in to comment.