forked from schacon/perl
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DELTA] $Revision: 2.70 $ $Date: 2015/02/05 10:53:00 $ ! Makefile.PL add bin/encguess to EXE_FILES 2.69 2015/02/05 10:35:11 ! bin/encguess Refactored so that * does not depend on non-core module (File::Slurp in particular) * PODified document * -s "encA encB" to -s encA,encB which is more shell-friendly * and more ! MANIFEST + bin/encguess Pulled: Added CLI wrapper for Encode::Guess dankogai/p5-encode#32 ! Unicode/Unicode.pm Pulled: Bump $VERSION in module changed since Encode-2.60 dankogai/p5-encode#31
- Loading branch information
Showing
16 changed files
with
220 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#!./perl | ||
use 5.008001; | ||
use strict; | ||
use warnings; | ||
use Encode; | ||
use Getopt::Std; | ||
use Carp; | ||
use Encode::Guess; | ||
$Getopt::Std::STANDARD_HELP_VERSION = 1; | ||
|
||
my %opt; | ||
getopts( "huSs:", \%opt ); | ||
my @suspect_list; | ||
list_valid_suspects() and exit if $opt{S}; | ||
@suspect_list = split /:,/, $opt{s} if $opt{s}; | ||
HELP_MESSAGE() if $opt{h}; | ||
HELP_MESSAGE() unless @ARGV; | ||
do_guess($_) for @ARGV; | ||
|
||
sub read_file { | ||
my $filename = shift; | ||
local $/; | ||
open my $fh, '<:raw', $filename or croak "$filename:$!"; | ||
my $content = <$fh>; | ||
close $fh; | ||
return $content; | ||
} | ||
|
||
sub do_guess { | ||
my $filename = shift; | ||
my $data = read_file($filename); | ||
my $enc = guess_encoding( $data, @suspect_list ); | ||
if ( !ref($enc) && $opt{u} ) { | ||
return 1; | ||
} | ||
print "$filename\t"; | ||
if ( ref($enc) ) { | ||
print $enc->mime_name(); | ||
} | ||
else { | ||
print "unknown"; | ||
} | ||
print "\n"; | ||
return 1; | ||
} | ||
|
||
sub list_valid_suspects { | ||
print join( "\n", Encode->encodings(":all") ); | ||
print "\n"; | ||
return 1; | ||
} | ||
|
||
sub HELP_MESSAGE { | ||
exec 'pod2usage', $0 or die "pod2usage: $!" | ||
} | ||
__END__ | ||
=head1 NAME | ||
encguess - guess character encodings of files | ||
=head1 VERSION | ||
$Id: encguess,v 0.1 2015/02/05 10:34:19 dankogai Exp $ | ||
=head1 SYNOPSIS | ||
encguess [switches] filename... | ||
=head2 SWITCHES | ||
=over 2 | ||
=item -h | ||
show this message and exit. | ||
=item -s | ||
specify a list of "suspect encoding types" to test, | ||
seperated by either C<:> or C<,> | ||
=item -S | ||
output a list of all acceptable encoding types that can be used with | ||
the -s param | ||
=item -u | ||
suppress display of unidentified types | ||
=back | ||
=head2 EXAMPLES: | ||
=over 2 | ||
=item * | ||
Guess encoding of a file named C<test.txt>, using only the default | ||
suspect types. | ||
encguess test.txt | ||
=item * | ||
Guess the encoding type of a file named C<test.txt>, using the suspect | ||
types C<euc-jp,shiftjis,7bit-jis>. | ||
encguess -s euc-jp,shiftjis,7bit-jis test.txt | ||
encguess -s euc-jp:shiftjis:7bit-jis test.txt | ||
=item * | ||
Guess the encoding type of several files, do not display results for | ||
unidentified files. | ||
encguess -us euc-jp,shiftjis,7bit-jis test*.txt | ||
=back | ||
=head1 DESCRIPTION | ||
The encoding identification is done by checking one encoding type at a | ||
time until all but the right type are eliminated. The set of encoding | ||
types to try is defined by the -s parameter and defaults to ascii, | ||
utf8 and UTF-16/32 with BOM. This can be overridden by passing one or | ||
more encoding types via the -s parameter. If you need to pass in | ||
multiple suspect encoding types, use a quoted string with the a space | ||
separating each value. | ||
=head1 SEE ALSO | ||
L<Encode::Guess>, L<Encode::Detect> | ||
=head1 LICENSE AND COPYRIGHT | ||
Copyright 2015 Michael LaGrasta and Dan Kogai. | ||
This program is free software; you can redistribute it and/or modify it | ||
under the terms of the the Artistic License (2.0). You may obtain a | ||
copy of the full license at: | ||
L<http://www.perlfoundation.org/artistic_license_2_0> | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/usr/local/bin/perl | ||
|
||
use Config; | ||
use File::Basename qw(&basename &dirname); | ||
use Cwd; | ||
|
||
# List explicitly here the variables you want Configure to | ||
# generate. Metaconfig only looks for shell variables, so you | ||
# have to mention them as if they were shell variables, not | ||
# %Config entries. Thus you write | ||
# $startperl | ||
# to ensure Configure will look for $Config{startperl}. | ||
|
||
# This forces PL files to create target in same directory as PL file. | ||
# This is so that make depend always knows where to find PL derivatives. | ||
my $origdir = cwd; | ||
chdir dirname($0); | ||
my $file = basename($0, '.PL'); | ||
$file .= '.com' if $^O eq 'VMS'; | ||
|
||
open OUT,">$file" or die "Can't create $file: $!"; | ||
|
||
print "Extracting $file (with variable substitutions)\n"; | ||
|
||
# In this section, perl variables will be expanded during extraction. | ||
# You can use $Config{...} to use Configure variables. | ||
|
||
print OUT <<"!GROK!THIS!"; | ||
$Config{startperl} | ||
eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' | ||
if \$running_under_some_shell; | ||
!GROK!THIS! | ||
|
||
use File::Spec; | ||
|
||
my $enc2xs = File::Spec->catfile(File::Spec->catdir(File::Spec->updir, "cpan", "Encode", "bin"), "encguess"); | ||
|
||
if (open(ENC2XS, $enc2xs)) { | ||
print OUT <ENC2XS>; | ||
close ENC2XS; | ||
} else { | ||
die "$0: cannot find '$enc2xs'\n"; | ||
} | ||
|
||
close OUT or die "Can't close $file: $!"; | ||
chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; | ||
exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; | ||
chdir $origdir; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters